1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
37b411b363SPhilipp Reisner #include "drbd_int.h"
38a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner #include "drbd_vli.h"
41b411b363SPhilipp Reisner 
42f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4320c68fdeSLars Ellenberg 
4477351055SPhilipp Reisner struct packet_info {
4577351055SPhilipp Reisner 	enum drbd_packet cmd;
46e2857216SAndreas Gruenbacher 	unsigned int size;
47e2857216SAndreas Gruenbacher 	unsigned int vnr;
48e658983aSAndreas Gruenbacher 	void *data;
4977351055SPhilipp Reisner };
5077351055SPhilipp Reisner 
51b411b363SPhilipp Reisner enum finish_epoch {
52b411b363SPhilipp Reisner 	FE_STILL_LIVE,
53b411b363SPhilipp Reisner 	FE_DESTROYED,
54b411b363SPhilipp Reisner 	FE_RECYCLED,
55b411b363SPhilipp Reisner };
56b411b363SPhilipp Reisner 
57bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
58bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5969a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
60a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
61bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6299920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
66b411b363SPhilipp Reisner 
6745bb912bSLars Ellenberg /*
6845bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6945bb912bSLars Ellenberg  * page->private being our "next" pointer.
7045bb912bSLars Ellenberg  */
7145bb912bSLars Ellenberg 
7245bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7345bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7445bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7545bb912bSLars Ellenberg  */
7645bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7745bb912bSLars Ellenberg {
7845bb912bSLars Ellenberg 	struct page *page;
7945bb912bSLars Ellenberg 	struct page *tmp;
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg 	BUG_ON(!n);
8245bb912bSLars Ellenberg 	BUG_ON(!head);
8345bb912bSLars Ellenberg 
8445bb912bSLars Ellenberg 	page = *head;
8523ce4227SPhilipp Reisner 
8623ce4227SPhilipp Reisner 	if (!page)
8723ce4227SPhilipp Reisner 		return NULL;
8823ce4227SPhilipp Reisner 
8945bb912bSLars Ellenberg 	while (page) {
9045bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9145bb912bSLars Ellenberg 		if (--n == 0)
9245bb912bSLars Ellenberg 			break; /* found sufficient pages */
9345bb912bSLars Ellenberg 		if (tmp == NULL)
9445bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9545bb912bSLars Ellenberg 			return NULL;
9645bb912bSLars Ellenberg 		page = tmp;
9745bb912bSLars Ellenberg 	}
9845bb912bSLars Ellenberg 
9945bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10045bb912bSLars Ellenberg 	set_page_private(page, 0);
10145bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10245bb912bSLars Ellenberg 	page = *head;
10345bb912bSLars Ellenberg 	*head = tmp;
10445bb912bSLars Ellenberg 	return page;
10545bb912bSLars Ellenberg }
10645bb912bSLars Ellenberg 
10745bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10845bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10945bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11045bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11145bb912bSLars Ellenberg {
11245bb912bSLars Ellenberg 	struct page *tmp;
11345bb912bSLars Ellenberg 	int i = 1;
11445bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page)))
11545bb912bSLars Ellenberg 		++i, page = tmp;
11645bb912bSLars Ellenberg 	if (len)
11745bb912bSLars Ellenberg 		*len = i;
11845bb912bSLars Ellenberg 	return page;
11945bb912bSLars Ellenberg }
12045bb912bSLars Ellenberg 
12145bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12245bb912bSLars Ellenberg {
12345bb912bSLars Ellenberg 	struct page *tmp;
12445bb912bSLars Ellenberg 	int i = 0;
12545bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12645bb912bSLars Ellenberg 		put_page(page);
12745bb912bSLars Ellenberg 		++i;
12845bb912bSLars Ellenberg 	}
12945bb912bSLars Ellenberg 	return i;
13045bb912bSLars Ellenberg }
13145bb912bSLars Ellenberg 
13245bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13345bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13445bb912bSLars Ellenberg {
13545bb912bSLars Ellenberg #if 1
13645bb912bSLars Ellenberg 	struct page *tmp;
13745bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
13845bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
13945bb912bSLars Ellenberg #endif
14045bb912bSLars Ellenberg 
14145bb912bSLars Ellenberg 	/* add chain to head */
14245bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14345bb912bSLars Ellenberg 	*head = chain_first;
14445bb912bSLars Ellenberg }
14545bb912bSLars Ellenberg 
146b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14718c2d522SAndreas Gruenbacher 				       unsigned int number)
148b411b363SPhilipp Reisner {
149b411b363SPhilipp Reisner 	struct page *page = NULL;
15045bb912bSLars Ellenberg 	struct page *tmp = NULL;
15118c2d522SAndreas Gruenbacher 	unsigned int i = 0;
152b411b363SPhilipp Reisner 
153b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
154b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15545bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
156b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15745bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
15845bb912bSLars Ellenberg 		if (page)
15945bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
160b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16145bb912bSLars Ellenberg 		if (page)
16245bb912bSLars Ellenberg 			return page;
163b411b363SPhilipp Reisner 	}
16445bb912bSLars Ellenberg 
165b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
166b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
167b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
16845bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
16945bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17045bb912bSLars Ellenberg 		if (!tmp)
17145bb912bSLars Ellenberg 			break;
17245bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17345bb912bSLars Ellenberg 		page = tmp;
17445bb912bSLars Ellenberg 	}
17545bb912bSLars Ellenberg 
17645bb912bSLars Ellenberg 	if (i == number)
177b411b363SPhilipp Reisner 		return page;
17845bb912bSLars Ellenberg 
17945bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
180c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18145bb912bSLars Ellenberg 	 * function "soon". */
18245bb912bSLars Ellenberg 	if (page) {
18345bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18445bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18545bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18645bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18745bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
18845bb912bSLars Ellenberg 	}
18945bb912bSLars Ellenberg 	return NULL;
190b411b363SPhilipp Reisner }
191b411b363SPhilipp Reisner 
192b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
193a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
194b411b363SPhilipp Reisner {
195a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
196b411b363SPhilipp Reisner 
197b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
198b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
199b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
200b411b363SPhilipp Reisner 	   stop to examine the list... */
201b411b363SPhilipp Reisner 
202a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
203045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
204b411b363SPhilipp Reisner 			break;
205a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
206b411b363SPhilipp Reisner 	}
207b411b363SPhilipp Reisner }
208b411b363SPhilipp Reisner 
209668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
210b411b363SPhilipp Reisner {
211b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
212db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
213b411b363SPhilipp Reisner 
2140500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
215b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2160500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
217a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
218b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
219b411b363SPhilipp Reisner }
220b411b363SPhilipp Reisner 
221668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
222668700b4SPhilipp Reisner {
223668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
224668700b4SPhilipp Reisner 	int vnr;
225668700b4SPhilipp Reisner 
226668700b4SPhilipp Reisner 	rcu_read_lock();
227668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
228668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
229668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
230668700b4SPhilipp Reisner 			continue;
231668700b4SPhilipp Reisner 
232668700b4SPhilipp Reisner 		kref_get(&device->kref);
233668700b4SPhilipp Reisner 		rcu_read_unlock();
234668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
235668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
236668700b4SPhilipp Reisner 		rcu_read_lock();
237668700b4SPhilipp Reisner 	}
238668700b4SPhilipp Reisner 	rcu_read_unlock();
239668700b4SPhilipp Reisner }
240668700b4SPhilipp Reisner 
241b411b363SPhilipp Reisner /**
242c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
243b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
24445bb912bSLars Ellenberg  * @number:	number of pages requested
24545bb912bSLars Ellenberg  * @retry:	whether to retry, if not enough pages are available right now
246b411b363SPhilipp Reisner  *
24745bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2480e49d7b0SLars Ellenberg  * the kernel.
24945bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25045bb912bSLars Ellenberg  *
2510e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2520e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2530e49d7b0SLars Ellenberg  *
2540e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2550e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2560e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2570e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2580e49d7b0SLars Ellenberg  *
25945bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
260b411b363SPhilipp Reisner  */
26169a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
262c37c8ecfSAndreas Gruenbacher 			      bool retry)
263b411b363SPhilipp Reisner {
26469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
265b411b363SPhilipp Reisner 	struct page *page = NULL;
26644ed167dSPhilipp Reisner 	struct net_conf *nc;
267b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2680e49d7b0SLars Ellenberg 	unsigned int mxb;
269b411b363SPhilipp Reisner 
27044ed167dSPhilipp Reisner 	rcu_read_lock();
27169a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27244ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27344ed167dSPhilipp Reisner 	rcu_read_unlock();
27444ed167dSPhilipp Reisner 
275b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
276b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
277b411b363SPhilipp Reisner 
278668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
279668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
280668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
281668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
282668700b4SPhilipp Reisner 
28345bb912bSLars Ellenberg 	while (page == NULL) {
284b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
285b411b363SPhilipp Reisner 
286668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
287b411b363SPhilipp Reisner 
288b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
289b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
290b411b363SPhilipp Reisner 			if (page)
291b411b363SPhilipp Reisner 				break;
292b411b363SPhilipp Reisner 		}
293b411b363SPhilipp Reisner 
294b411b363SPhilipp Reisner 		if (!retry)
295b411b363SPhilipp Reisner 			break;
296b411b363SPhilipp Reisner 
297b411b363SPhilipp Reisner 		if (signal_pending(current)) {
298d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
299b411b363SPhilipp Reisner 			break;
300b411b363SPhilipp Reisner 		}
301b411b363SPhilipp Reisner 
3020e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3030e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
304b411b363SPhilipp Reisner 	}
305b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
306b411b363SPhilipp Reisner 
30745bb912bSLars Ellenberg 	if (page)
308b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
309b411b363SPhilipp Reisner 	return page;
310b411b363SPhilipp Reisner }
311b411b363SPhilipp Reisner 
312c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3130500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31445bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31545bb912bSLars Ellenberg  * or returns all pages to the system. */
316b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
317b411b363SPhilipp Reisner {
318b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
319b411b363SPhilipp Reisner 	int i;
320435f0740SLars Ellenberg 
321a73ff323SLars Ellenberg 	if (page == NULL)
322a73ff323SLars Ellenberg 		return;
323a73ff323SLars Ellenberg 
324183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32545bb912bSLars Ellenberg 		i = page_chain_free(page);
32645bb912bSLars Ellenberg 	else {
32745bb912bSLars Ellenberg 		struct page *tmp;
32845bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
329b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33045bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33145bb912bSLars Ellenberg 		drbd_pp_vacant += i;
332b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
333b411b363SPhilipp Reisner 	}
334435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33545bb912bSLars Ellenberg 	if (i < 0)
336d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
337435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
338b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
339b411b363SPhilipp Reisner }
340b411b363SPhilipp Reisner 
341b411b363SPhilipp Reisner /*
342b411b363SPhilipp Reisner You need to hold the req_lock:
343b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
344b411b363SPhilipp Reisner 
345b411b363SPhilipp Reisner You must not have the req_lock:
3463967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3470db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3487721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
349b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
350a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
351b411b363SPhilipp Reisner  drbd_clear_done_ee()
352b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
353b411b363SPhilipp Reisner */
354b411b363SPhilipp Reisner 
3559104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3569104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3579104d31aSLars Ellenberg  * trim: payload_size == 0 */
358f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
35969a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3609104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
361b411b363SPhilipp Reisner {
36269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
363db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
364a73ff323SLars Ellenberg 	struct page *page = NULL;
3659104d31aSLars Ellenberg 	unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
366b411b363SPhilipp Reisner 
367b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
368b411b363SPhilipp Reisner 		return NULL;
369b411b363SPhilipp Reisner 
3700892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
371db830c46SAndreas Gruenbacher 	if (!peer_req) {
372b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
373d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
374b411b363SPhilipp Reisner 		return NULL;
375b411b363SPhilipp Reisner 	}
376b411b363SPhilipp Reisner 
3779104d31aSLars Ellenberg 	if (nr_pages) {
378d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
379d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38045bb912bSLars Ellenberg 		if (!page)
38145bb912bSLars Ellenberg 			goto fail;
382a73ff323SLars Ellenberg 	}
383b411b363SPhilipp Reisner 
384c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
385c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
386db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3879104d31aSLars Ellenberg 	peer_req->i.size = request_size;
388db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
389c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
390a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
391db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3929a8e7753SAndreas Gruenbacher 	/*
3939a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3949a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3959a8e7753SAndreas Gruenbacher 	 */
396db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
397b411b363SPhilipp Reisner 
398db830c46SAndreas Gruenbacher 	return peer_req;
399b411b363SPhilipp Reisner 
40045bb912bSLars Ellenberg  fail:
4010892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
402b411b363SPhilipp Reisner 	return NULL;
403b411b363SPhilipp Reisner }
404b411b363SPhilipp Reisner 
405b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
406f6ffca9fSAndreas Gruenbacher 		       int is_net)
407b411b363SPhilipp Reisner {
40821ae5d7fSLars Ellenberg 	might_sleep();
409db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
410db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
411b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4120b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4130b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41421ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41521ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41621ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41721ae5d7fSLars Ellenberg 	}
4180892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
419b411b363SPhilipp Reisner }
420b411b363SPhilipp Reisner 
421b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
422b411b363SPhilipp Reisner {
423b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
424db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
425b411b363SPhilipp Reisner 	int count = 0;
426b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
427b411b363SPhilipp Reisner 
4280500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
429b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4300500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
431b411b363SPhilipp Reisner 
432a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
433b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
434b411b363SPhilipp Reisner 		count++;
435b411b363SPhilipp Reisner 	}
436b411b363SPhilipp Reisner 	return count;
437b411b363SPhilipp Reisner }
438b411b363SPhilipp Reisner 
439b411b363SPhilipp Reisner /*
440a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
441b411b363SPhilipp Reisner  */
442b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
443b411b363SPhilipp Reisner {
444b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
445b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
446db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
447e2b3032bSAndreas Gruenbacher 	int err = 0;
448b411b363SPhilipp Reisner 
4490500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
450b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
451b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4520500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
453b411b363SPhilipp Reisner 
454a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
455b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
456b411b363SPhilipp Reisner 
457b411b363SPhilipp Reisner 	/* possible callbacks here:
458d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
459b411b363SPhilipp Reisner 	 * all ignore the last argument.
460b411b363SPhilipp Reisner 	 */
461a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
462e2b3032bSAndreas Gruenbacher 		int err2;
463e2b3032bSAndreas Gruenbacher 
464b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
465a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
466e2b3032bSAndreas Gruenbacher 		if (!err)
467e2b3032bSAndreas Gruenbacher 			err = err2;
468b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
469b411b363SPhilipp Reisner 	}
470b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
471b411b363SPhilipp Reisner 
472e2b3032bSAndreas Gruenbacher 	return err;
473b411b363SPhilipp Reisner }
474b411b363SPhilipp Reisner 
475b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
476d4da1537SAndreas Gruenbacher 				     struct list_head *head)
477b411b363SPhilipp Reisner {
478b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
479b411b363SPhilipp Reisner 
480b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
481b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
482b411b363SPhilipp Reisner 	while (!list_empty(head)) {
483b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4840500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4857eaceaccSJens Axboe 		io_schedule();
486b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4870500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
488b411b363SPhilipp Reisner 	}
489b411b363SPhilipp Reisner }
490b411b363SPhilipp Reisner 
491b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
492d4da1537SAndreas Gruenbacher 				    struct list_head *head)
493b411b363SPhilipp Reisner {
4940500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
495b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4960500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
497b411b363SPhilipp Reisner }
498b411b363SPhilipp Reisner 
499dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
500b411b363SPhilipp Reisner {
501b411b363SPhilipp Reisner 	struct kvec iov = {
502b411b363SPhilipp Reisner 		.iov_base = buf,
503b411b363SPhilipp Reisner 		.iov_len = size,
504b411b363SPhilipp Reisner 	};
505b411b363SPhilipp Reisner 	struct msghdr msg = {
506b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
507b411b363SPhilipp Reisner 	};
508aa563d7bSDavid Howells 	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
509f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
510b411b363SPhilipp Reisner }
511b411b363SPhilipp Reisner 
512bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
513b411b363SPhilipp Reisner {
514b411b363SPhilipp Reisner 	int rv;
515b411b363SPhilipp Reisner 
516bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
517b411b363SPhilipp Reisner 
518b411b363SPhilipp Reisner 	if (rv < 0) {
519b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5201ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
521b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5221ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
523b411b363SPhilipp Reisner 	} else if (rv == 0) {
524bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
525b66623e3SPhilipp Reisner 			long t;
526b66623e3SPhilipp Reisner 			rcu_read_lock();
527bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
528b66623e3SPhilipp Reisner 			rcu_read_unlock();
529b66623e3SPhilipp Reisner 
530bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
531b66623e3SPhilipp Reisner 
532599377acSPhilipp Reisner 			if (t)
533599377acSPhilipp Reisner 				goto out;
534599377acSPhilipp Reisner 		}
5351ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
536599377acSPhilipp Reisner 	}
537599377acSPhilipp Reisner 
538b411b363SPhilipp Reisner 	if (rv != size)
539bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
540b411b363SPhilipp Reisner 
541599377acSPhilipp Reisner out:
542b411b363SPhilipp Reisner 	return rv;
543b411b363SPhilipp Reisner }
544b411b363SPhilipp Reisner 
545bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
546c6967746SAndreas Gruenbacher {
547c6967746SAndreas Gruenbacher 	int err;
548c6967746SAndreas Gruenbacher 
549bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
550c6967746SAndreas Gruenbacher 	if (err != size) {
551c6967746SAndreas Gruenbacher 		if (err >= 0)
552c6967746SAndreas Gruenbacher 			err = -EIO;
553c6967746SAndreas Gruenbacher 	} else
554c6967746SAndreas Gruenbacher 		err = 0;
555c6967746SAndreas Gruenbacher 	return err;
556c6967746SAndreas Gruenbacher }
557c6967746SAndreas Gruenbacher 
558bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
559a5c31904SAndreas Gruenbacher {
560a5c31904SAndreas Gruenbacher 	int err;
561a5c31904SAndreas Gruenbacher 
562bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
563a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5641ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
565a5c31904SAndreas Gruenbacher 	return err;
566a5c31904SAndreas Gruenbacher }
567a5c31904SAndreas Gruenbacher 
5685dbf1673SLars Ellenberg /* quoting tcp(7):
5695dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5705dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5715dbf1673SLars Ellenberg  * This is our wrapper to do so.
5725dbf1673SLars Ellenberg  */
5735dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5745dbf1673SLars Ellenberg 		unsigned int rcv)
5755dbf1673SLars Ellenberg {
5765dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5775dbf1673SLars Ellenberg 	if (snd) {
5785dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5795dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5805dbf1673SLars Ellenberg 	}
5815dbf1673SLars Ellenberg 	if (rcv) {
5825dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5835dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5845dbf1673SLars Ellenberg 	}
5855dbf1673SLars Ellenberg }
5865dbf1673SLars Ellenberg 
587bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
588b411b363SPhilipp Reisner {
589b411b363SPhilipp Reisner 	const char *what;
590b411b363SPhilipp Reisner 	struct socket *sock;
591b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59244ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59344ed167dSPhilipp Reisner 	struct net_conf *nc;
59444ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59569ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
596b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
597b411b363SPhilipp Reisner 
59844ed167dSPhilipp Reisner 	rcu_read_lock();
599bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60044ed167dSPhilipp Reisner 	if (!nc) {
60144ed167dSPhilipp Reisner 		rcu_read_unlock();
602b411b363SPhilipp Reisner 		return NULL;
60344ed167dSPhilipp Reisner 	}
60444ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60544ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60669ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
607089c075dSAndreas Gruenbacher 	rcu_read_unlock();
60844ed167dSPhilipp Reisner 
609bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
610bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61144ed167dSPhilipp Reisner 
612bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61344ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61444ed167dSPhilipp Reisner 	else
61544ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61644ed167dSPhilipp Reisner 
617bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
618bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
619b411b363SPhilipp Reisner 
620b411b363SPhilipp Reisner 	what = "sock_create_kern";
621eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
622b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
623b411b363SPhilipp Reisner 	if (err < 0) {
624b411b363SPhilipp Reisner 		sock = NULL;
625b411b363SPhilipp Reisner 		goto out;
626b411b363SPhilipp Reisner 	}
627b411b363SPhilipp Reisner 
628b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
62969ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63044ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
631b411b363SPhilipp Reisner 
632b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
633b411b363SPhilipp Reisner 	*  for the outgoing connections.
634b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
635b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
636b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
637b411b363SPhilipp Reisner 	*  a free one dynamically.
638b411b363SPhilipp Reisner 	*/
639b411b363SPhilipp Reisner 	what = "bind before connect";
64044ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
641b411b363SPhilipp Reisner 	if (err < 0)
642b411b363SPhilipp Reisner 		goto out;
643b411b363SPhilipp Reisner 
644b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
645b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
646b411b363SPhilipp Reisner 	disconnect_on_error = 0;
647b411b363SPhilipp Reisner 	what = "connect";
64844ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
649b411b363SPhilipp Reisner 
650b411b363SPhilipp Reisner out:
651b411b363SPhilipp Reisner 	if (err < 0) {
652b411b363SPhilipp Reisner 		if (sock) {
653b411b363SPhilipp Reisner 			sock_release(sock);
654b411b363SPhilipp Reisner 			sock = NULL;
655b411b363SPhilipp Reisner 		}
656b411b363SPhilipp Reisner 		switch (-err) {
657b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
658b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
659b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
660b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
661b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
662b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
663b411b363SPhilipp Reisner 			disconnect_on_error = 0;
664b411b363SPhilipp Reisner 			break;
665b411b363SPhilipp Reisner 		default:
6661ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
667b411b363SPhilipp Reisner 		}
668b411b363SPhilipp Reisner 		if (disconnect_on_error)
669bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
670b411b363SPhilipp Reisner 	}
67144ed167dSPhilipp Reisner 
672b411b363SPhilipp Reisner 	return sock;
673b411b363SPhilipp Reisner }
674b411b363SPhilipp Reisner 
6757a426fd8SPhilipp Reisner struct accept_wait_data {
676bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6777a426fd8SPhilipp Reisner 	struct socket *s_listen;
6787a426fd8SPhilipp Reisner 	struct completion door_bell;
6797a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6807a426fd8SPhilipp Reisner 
6817a426fd8SPhilipp Reisner };
6827a426fd8SPhilipp Reisner 
683715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
684b411b363SPhilipp Reisner {
6857a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
686715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6877a426fd8SPhilipp Reisner 
688715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
689715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6907a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
691715306f6SAndreas Gruenbacher 	state_change(sk);
6927a426fd8SPhilipp Reisner }
6937a426fd8SPhilipp Reisner 
694bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
695b411b363SPhilipp Reisner {
6961f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69744ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
6981f3e509bSPhilipp Reisner 	struct socket *s_listen;
69944ed167dSPhilipp Reisner 	struct net_conf *nc;
700b411b363SPhilipp Reisner 	const char *what;
701b411b363SPhilipp Reisner 
70244ed167dSPhilipp Reisner 	rcu_read_lock();
703bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70444ed167dSPhilipp Reisner 	if (!nc) {
70544ed167dSPhilipp Reisner 		rcu_read_unlock();
7067a426fd8SPhilipp Reisner 		return -EIO;
70744ed167dSPhilipp Reisner 	}
70844ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
70944ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71044ed167dSPhilipp Reisner 	rcu_read_unlock();
711b411b363SPhilipp Reisner 
712bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
713bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
714b411b363SPhilipp Reisner 
715b411b363SPhilipp Reisner 	what = "sock_create_kern";
716eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
717b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
718b411b363SPhilipp Reisner 	if (err) {
719b411b363SPhilipp Reisner 		s_listen = NULL;
720b411b363SPhilipp Reisner 		goto out;
721b411b363SPhilipp Reisner 	}
722b411b363SPhilipp Reisner 
7234a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72444ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
725b411b363SPhilipp Reisner 
726b411b363SPhilipp Reisner 	what = "bind before listen";
72744ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
728b411b363SPhilipp Reisner 	if (err < 0)
729b411b363SPhilipp Reisner 		goto out;
730b411b363SPhilipp Reisner 
7317a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7327a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7337a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
734715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7357a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7367a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
737b411b363SPhilipp Reisner 
7382820fd39SPhilipp Reisner 	what = "listen";
7392820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7402820fd39SPhilipp Reisner 	if (err < 0)
7412820fd39SPhilipp Reisner 		goto out;
7422820fd39SPhilipp Reisner 
7437a426fd8SPhilipp Reisner 	return 0;
744b411b363SPhilipp Reisner out:
745b411b363SPhilipp Reisner 	if (s_listen)
746b411b363SPhilipp Reisner 		sock_release(s_listen);
747b411b363SPhilipp Reisner 	if (err < 0) {
748b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7491ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
750bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
751b411b363SPhilipp Reisner 		}
752b411b363SPhilipp Reisner 	}
7531f3e509bSPhilipp Reisner 
7547a426fd8SPhilipp Reisner 	return -EIO;
7551f3e509bSPhilipp Reisner }
7561f3e509bSPhilipp Reisner 
757715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
758715306f6SAndreas Gruenbacher {
759715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
760715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
761715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
762715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
763715306f6SAndreas Gruenbacher }
764715306f6SAndreas Gruenbacher 
765bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7661f3e509bSPhilipp Reisner {
7671f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7681f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7691f3e509bSPhilipp Reisner 	struct net_conf *nc;
7701f3e509bSPhilipp Reisner 
7711f3e509bSPhilipp Reisner 	rcu_read_lock();
772bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7731f3e509bSPhilipp Reisner 	if (!nc) {
7741f3e509bSPhilipp Reisner 		rcu_read_unlock();
7751f3e509bSPhilipp Reisner 		return NULL;
7761f3e509bSPhilipp Reisner 	}
7771f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7781f3e509bSPhilipp Reisner 	rcu_read_unlock();
7791f3e509bSPhilipp Reisner 
7801f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78138b682b2SAkinobu Mita 	/* 28.5% random jitter */
78238b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7831f3e509bSPhilipp Reisner 
7847a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7857a426fd8SPhilipp Reisner 	if (err <= 0)
7867a426fd8SPhilipp Reisner 		return NULL;
7871f3e509bSPhilipp Reisner 
7887a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
789b411b363SPhilipp Reisner 	if (err < 0) {
790b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7911ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
792bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
793b411b363SPhilipp Reisner 		}
794b411b363SPhilipp Reisner 	}
795b411b363SPhilipp Reisner 
796715306f6SAndreas Gruenbacher 	if (s_estab)
797715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
798b411b363SPhilipp Reisner 
799b411b363SPhilipp Reisner 	return s_estab;
800b411b363SPhilipp Reisner }
801b411b363SPhilipp Reisner 
802bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
803b411b363SPhilipp Reisner 
804bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8059f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8069f5bdc33SAndreas Gruenbacher {
807bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8089f5bdc33SAndreas Gruenbacher 		return -EIO;
809bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
810b411b363SPhilipp Reisner }
811b411b363SPhilipp Reisner 
812bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
813b411b363SPhilipp Reisner {
814bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8159f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8164920e37aSPhilipp Reisner 	struct net_conf *nc;
8179f5bdc33SAndreas Gruenbacher 	int err;
818b411b363SPhilipp Reisner 
8194920e37aSPhilipp Reisner 	rcu_read_lock();
8204920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8214920e37aSPhilipp Reisner 	if (!nc) {
8224920e37aSPhilipp Reisner 		rcu_read_unlock();
8234920e37aSPhilipp Reisner 		return -EIO;
8244920e37aSPhilipp Reisner 	}
8254920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8264920e37aSPhilipp Reisner 	rcu_read_unlock();
8274920e37aSPhilipp Reisner 
828bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8299f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8309f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8319f5bdc33SAndreas Gruenbacher 			err = -EIO;
8329f5bdc33SAndreas Gruenbacher 		return err;
8339f5bdc33SAndreas Gruenbacher 	}
834bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8359f5bdc33SAndreas Gruenbacher 	if (err)
8369f5bdc33SAndreas Gruenbacher 		return err;
8379f5bdc33SAndreas Gruenbacher 	return pi.cmd;
838b411b363SPhilipp Reisner }
839b411b363SPhilipp Reisner 
840b411b363SPhilipp Reisner /**
841b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
842b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
843b411b363SPhilipp Reisner  */
8445d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
845b411b363SPhilipp Reisner {
846b411b363SPhilipp Reisner 	int rr;
847b411b363SPhilipp Reisner 	char tb[4];
848b411b363SPhilipp Reisner 
849b411b363SPhilipp Reisner 	if (!*sock)
85081e84650SAndreas Gruenbacher 		return false;
851b411b363SPhilipp Reisner 
852dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
853b411b363SPhilipp Reisner 
854b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85581e84650SAndreas Gruenbacher 		return true;
856b411b363SPhilipp Reisner 	} else {
857b411b363SPhilipp Reisner 		sock_release(*sock);
858b411b363SPhilipp Reisner 		*sock = NULL;
85981e84650SAndreas Gruenbacher 		return false;
860b411b363SPhilipp Reisner 	}
861b411b363SPhilipp Reisner }
8625d0b17f1SPhilipp Reisner 
8635d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8645d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8655d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8665d0b17f1SPhilipp Reisner {
8675d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8685d0b17f1SPhilipp Reisner 	int timeout;
8695d0b17f1SPhilipp Reisner 	bool ok;
8705d0b17f1SPhilipp Reisner 
8715d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8725d0b17f1SPhilipp Reisner 		return false;
8735d0b17f1SPhilipp Reisner 
8745d0b17f1SPhilipp Reisner 	rcu_read_lock();
8755d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8765d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8775d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8785d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8795d0b17f1SPhilipp Reisner 
8805d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8815d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8825d0b17f1SPhilipp Reisner 
8835d0b17f1SPhilipp Reisner 	return ok;
8845d0b17f1SPhilipp Reisner }
8855d0b17f1SPhilipp Reisner 
8862325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8872325eb66SPhilipp Reisner    in a connection */
88869a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
889907599e0SPhilipp Reisner {
89069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8910829f5edSAndreas Gruenbacher 	int err;
892907599e0SPhilipp Reisner 
893b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
894b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
895907599e0SPhilipp Reisner 
89669a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89769a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
898b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
8998410da8fSPhilipp Reisner 
90069a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9010829f5edSAndreas Gruenbacher 	if (!err)
90269a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9030829f5edSAndreas Gruenbacher 	if (!err)
90469a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9050829f5edSAndreas Gruenbacher 	if (!err)
90669a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
907b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
908b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
909b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
910b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9110829f5edSAndreas Gruenbacher 	return err;
912907599e0SPhilipp Reisner }
913b411b363SPhilipp Reisner 
914b411b363SPhilipp Reisner /*
915b411b363SPhilipp Reisner  * return values:
916b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
917b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
918b411b363SPhilipp Reisner  *  -1 peer talks different language,
919b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
920b411b363SPhilipp Reisner  *  -2 We do not have a network config...
921b411b363SPhilipp Reisner  */
922bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
923b411b363SPhilipp Reisner {
9247da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
925c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92644ed167dSPhilipp Reisner 	struct net_conf *nc;
9275d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9285d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
929197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9307a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
931bde89a9eSAndreas Gruenbacher 		.connection = connection,
9327a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9337a426fd8SPhilipp Reisner 	};
934b411b363SPhilipp Reisner 
935bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
936bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
937b411b363SPhilipp Reisner 		return -2;
938b411b363SPhilipp Reisner 
9397da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
940bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
941bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9427da35862SPhilipp Reisner 	sock.socket = NULL;
9437da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
944bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
945bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9467da35862SPhilipp Reisner 	msock.socket = NULL;
9477da35862SPhilipp Reisner 
9480916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
949bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
950b411b363SPhilipp Reisner 
951bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9527a426fd8SPhilipp Reisner 		return 0;
953b411b363SPhilipp Reisner 
954b411b363SPhilipp Reisner 	do {
9552bf89621SAndreas Gruenbacher 		struct socket *s;
956b411b363SPhilipp Reisner 
957bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
958b411b363SPhilipp Reisner 		if (s) {
9597da35862SPhilipp Reisner 			if (!sock.socket) {
9607da35862SPhilipp Reisner 				sock.socket = s;
961bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9627da35862SPhilipp Reisner 			} else if (!msock.socket) {
963bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9647da35862SPhilipp Reisner 				msock.socket = s;
965bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
966b411b363SPhilipp Reisner 			} else {
9671ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
968b411b363SPhilipp Reisner 				goto out_release_sockets;
969b411b363SPhilipp Reisner 			}
970b411b363SPhilipp Reisner 		}
971b411b363SPhilipp Reisner 
9725d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
973b411b363SPhilipp Reisner 			break;
974b411b363SPhilipp Reisner 
975b411b363SPhilipp Reisner retry:
976bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
977b411b363SPhilipp Reisner 		if (s) {
978bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9797da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9807da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98192f14951SPhilipp Reisner 			switch (fp) {
982e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9837da35862SPhilipp Reisner 				if (sock.socket) {
9841ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9857da35862SPhilipp Reisner 					sock_release(sock.socket);
98680c6eed4SPhilipp Reisner 					sock.socket = s;
98780c6eed4SPhilipp Reisner 					goto randomize;
988b411b363SPhilipp Reisner 				}
9897da35862SPhilipp Reisner 				sock.socket = s;
990b411b363SPhilipp Reisner 				break;
991e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
992bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9937da35862SPhilipp Reisner 				if (msock.socket) {
9941ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9957da35862SPhilipp Reisner 					sock_release(msock.socket);
99680c6eed4SPhilipp Reisner 					msock.socket = s;
99780c6eed4SPhilipp Reisner 					goto randomize;
998b411b363SPhilipp Reisner 				}
9997da35862SPhilipp Reisner 				msock.socket = s;
1000b411b363SPhilipp Reisner 				break;
1001b411b363SPhilipp Reisner 			default:
10021ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1003b411b363SPhilipp Reisner 				sock_release(s);
100480c6eed4SPhilipp Reisner randomize:
100538b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1006b411b363SPhilipp Reisner 					goto retry;
1007b411b363SPhilipp Reisner 			}
1008b411b363SPhilipp Reisner 		}
1009b411b363SPhilipp Reisner 
1010bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1011b411b363SPhilipp Reisner 			goto out_release_sockets;
1012b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1013b411b363SPhilipp Reisner 			flush_signals(current);
1014b411b363SPhilipp Reisner 			smp_rmb();
1015bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1016b411b363SPhilipp Reisner 				goto out_release_sockets;
1017b411b363SPhilipp Reisner 		}
1018b411b363SPhilipp Reisner 
10195d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1020b666dbf8SPhilipp Reisner 	} while (!ok);
1021b411b363SPhilipp Reisner 
10227a426fd8SPhilipp Reisner 	if (ad.s_listen)
10237a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1024b411b363SPhilipp Reisner 
102598683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102698683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1027b411b363SPhilipp Reisner 
10287da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10297da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1030b411b363SPhilipp Reisner 
10317da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10327da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1033b411b363SPhilipp Reisner 
1034b411b363SPhilipp Reisner 	/* NOT YET ...
1035bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10367da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10376038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1038b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
103944ed167dSPhilipp Reisner 	rcu_read_lock();
1040bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1041b411b363SPhilipp Reisner 
10427da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10437da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104444ed167dSPhilipp Reisner 
10457da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104644ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104708b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
104844ed167dSPhilipp Reisner 	rcu_read_unlock();
104944ed167dSPhilipp Reisner 
10507da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1051b411b363SPhilipp Reisner 
1052b411b363SPhilipp Reisner 	/* we don't want delays.
105325985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105412abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105512abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1056b411b363SPhilipp Reisner 
1057bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1058bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1059bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1060b411b363SPhilipp Reisner 
1061bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1062b411b363SPhilipp Reisner 	if (h <= 0)
1063b411b363SPhilipp Reisner 		return h;
1064b411b363SPhilipp Reisner 
1065bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1066b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1067bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1068b10d96cbSJohannes Thoma 		case -1:
10691ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1070b411b363SPhilipp Reisner 			return -1;
1071b10d96cbSJohannes Thoma 		case 0:
10721ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1073b10d96cbSJohannes Thoma 			return 0;
1074b411b363SPhilipp Reisner 		}
1075b411b363SPhilipp Reisner 	}
1076b411b363SPhilipp Reisner 
1077bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1078bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1079b411b363SPhilipp Reisner 
1080bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10817e2455c1SPhilipp Reisner 		return -1;
10821e86ac48SPhilipp Reisner 
108313c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108413c76abaSPhilipp Reisner 	 * being promoted to Primary.
108513c76abaSPhilipp Reisner 	 *
108613c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108713c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
108813c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
108913c76abaSPhilipp Reisner 	 */
109031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109131007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109231007745SPhilipp Reisner 
1093cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1094cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109531007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1096cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109731007745SPhilipp Reisner 
109831007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109931007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110031007745SPhilipp Reisner 
110131007745SPhilipp Reisner 	rcu_read_lock();
110231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110331007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110431007745SPhilipp Reisner 		kref_get(&device->kref);
110531007745SPhilipp Reisner 		rcu_read_unlock();
110613c76abaSPhilipp Reisner 
110708b165baSPhilipp Reisner 		if (discard_my_data)
1108b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
110908b165baSPhilipp Reisner 		else
1110b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 
111269a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111305a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1114c141ebdaSPhilipp Reisner 		rcu_read_lock();
1115c141ebdaSPhilipp Reisner 	}
1116c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1117c141ebdaSPhilipp Reisner 
1118bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1119bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1120bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11211e86ac48SPhilipp Reisner 		return 0;
1122a1096a6eSPhilipp Reisner 	}
11231e86ac48SPhilipp Reisner 
11241c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112539e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112639e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112739e91a60SLars Ellenberg 	connection->ack_sender =
112839e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1129668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1130668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1131668700b4SPhilipp Reisner 		return 0;
1132668700b4SPhilipp Reisner 	}
1133b411b363SPhilipp Reisner 
11340500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113508b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113608b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113708b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
113808b165baSPhilipp Reisner 	 * just to clear a single value. */
1139bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11400500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114108b165baSPhilipp Reisner 
1142d3fcb490SPhilipp Reisner 	return h;
1143b411b363SPhilipp Reisner 
1144b411b363SPhilipp Reisner out_release_sockets:
11457a426fd8SPhilipp Reisner 	if (ad.s_listen)
11467a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11477da35862SPhilipp Reisner 	if (sock.socket)
11487da35862SPhilipp Reisner 		sock_release(sock.socket);
11497da35862SPhilipp Reisner 	if (msock.socket)
11507da35862SPhilipp Reisner 		sock_release(msock.socket);
1151b411b363SPhilipp Reisner 	return -1;
1152b411b363SPhilipp Reisner }
1153b411b363SPhilipp Reisner 
1154bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1155b411b363SPhilipp Reisner {
1156bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1157b411b363SPhilipp Reisner 
11580c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11590c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11600c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11610c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11621ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11630c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116402918be2SPhilipp Reisner 		}
11650c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11660c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11670c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11680c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1169e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1170e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1171e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1172b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1173eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1174e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1175e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1176e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1177e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1178e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
117977351055SPhilipp Reisner 		pi->vnr = 0;
118002918be2SPhilipp Reisner 	} else {
11811ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1182e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1183bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11848172f3e9SAndreas Gruenbacher 		return -EINVAL;
1185b411b363SPhilipp Reisner 	}
1186e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11878172f3e9SAndreas Gruenbacher 	return 0;
1188b411b363SPhilipp Reisner }
1189b411b363SPhilipp Reisner 
1190c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1191c51a0ef3SLars Ellenberg {
1192c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1193c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1194c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1195c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1196c51a0ef3SLars Ellenberg }
1197c51a0ef3SLars Ellenberg 
1198bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1199257d0af6SPhilipp Reisner {
1200bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120169bc7bc3SAndreas Gruenbacher 	int err;
1202257d0af6SPhilipp Reisner 
1203bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1204a5c31904SAndreas Gruenbacher 	if (err)
120569bc7bc3SAndreas Gruenbacher 		return err;
1206257d0af6SPhilipp Reisner 
1207bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1208bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1209b411b363SPhilipp Reisner 
121069bc7bc3SAndreas Gruenbacher 	return err;
1211b411b363SPhilipp Reisner }
1212b411b363SPhilipp Reisner 
1213c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1214c51a0ef3SLars Ellenberg {
1215c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1216c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1217c51a0ef3SLars Ellenberg 	int err;
1218c51a0ef3SLars Ellenberg 
1219c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1220c51a0ef3SLars Ellenberg 	if (err != size) {
1221c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1222c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1223c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1224c51a0ef3SLars Ellenberg 		 * received so far. */
1225c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1226ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1227c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1228c51a0ef3SLars Ellenberg 		}
1229c51a0ef3SLars Ellenberg 		if (err > 0) {
1230c51a0ef3SLars Ellenberg 			buffer += err;
1231c51a0ef3SLars Ellenberg 			size -= err;
1232c51a0ef3SLars Ellenberg 		}
1233c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1234c51a0ef3SLars Ellenberg 		if (err)
1235c51a0ef3SLars Ellenberg 			return err;
1236c51a0ef3SLars Ellenberg 	}
1237c51a0ef3SLars Ellenberg 
1238c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1239c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1240c51a0ef3SLars Ellenberg 
1241c51a0ef3SLars Ellenberg 	return err;
1242c51a0ef3SLars Ellenberg }
1243f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1244f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1245f9ff0da5SLars Ellenberg  * then wait for all completions.
1246f9ff0da5SLars Ellenberg  */
1247f9ff0da5SLars Ellenberg struct issue_flush_context {
1248f9ff0da5SLars Ellenberg 	atomic_t pending;
1249f9ff0da5SLars Ellenberg 	int error;
1250f9ff0da5SLars Ellenberg 	struct completion done;
1251f9ff0da5SLars Ellenberg };
1252f9ff0da5SLars Ellenberg struct one_flush_context {
1253f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1254f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1255f9ff0da5SLars Ellenberg };
1256f9ff0da5SLars Ellenberg 
12571ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1258f9ff0da5SLars Ellenberg {
1259f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1260f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1261f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1262f9ff0da5SLars Ellenberg 
12634e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12644e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12654e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1266f9ff0da5SLars Ellenberg 	}
1267f9ff0da5SLars Ellenberg 	kfree(octx);
1268f9ff0da5SLars Ellenberg 	bio_put(bio);
1269f9ff0da5SLars Ellenberg 
1270f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1271f9ff0da5SLars Ellenberg 	put_ldev(device);
1272f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1273f9ff0da5SLars Ellenberg 
1274f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1275f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1276f9ff0da5SLars Ellenberg }
1277f9ff0da5SLars Ellenberg 
1278f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1279f9ff0da5SLars Ellenberg {
1280f9ff0da5SLars Ellenberg 	struct bio *bio = bio_alloc(GFP_NOIO, 0);
1281f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1282f9ff0da5SLars Ellenberg 	if (!bio || !octx) {
1283f9ff0da5SLars Ellenberg 		drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1284f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1285f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1286f9ff0da5SLars Ellenberg 		 */
1287f9ff0da5SLars Ellenberg 		kfree(octx);
1288f9ff0da5SLars Ellenberg 		if (bio)
1289f9ff0da5SLars Ellenberg 			bio_put(bio);
1290f9ff0da5SLars Ellenberg 
1291f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1292f9ff0da5SLars Ellenberg 		put_ldev(device);
1293f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1294f9ff0da5SLars Ellenberg 		return;
1295f9ff0da5SLars Ellenberg 	}
1296f9ff0da5SLars Ellenberg 
1297f9ff0da5SLars Ellenberg 	octx->device = device;
1298f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
129974d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1300f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1301f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
130270fd7614SChristoph Hellwig 	bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
1303f9ff0da5SLars Ellenberg 
1304f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1305f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1306f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1307f9ff0da5SLars Ellenberg 	submit_bio(bio);
1308f9ff0da5SLars Ellenberg }
1309f9ff0da5SLars Ellenberg 
1310bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1311b411b363SPhilipp Reisner {
1312f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1313c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1314f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13154b0007c0SPhilipp Reisner 		int vnr;
1316b411b363SPhilipp Reisner 
1317f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1318f9ff0da5SLars Ellenberg 		ctx.error = 0;
1319f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1320f9ff0da5SLars Ellenberg 
1321615e087fSLars Ellenberg 		rcu_read_lock();
1322c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1323c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1324c06ece6bSAndreas Gruenbacher 
1325b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1326615e087fSLars Ellenberg 				continue;
1327b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1328615e087fSLars Ellenberg 			rcu_read_unlock();
13294b0007c0SPhilipp Reisner 
1330f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1331f9ff0da5SLars Ellenberg 
1332f9ff0da5SLars Ellenberg 			rcu_read_lock();
1333f9ff0da5SLars Ellenberg 		}
1334f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1335f9ff0da5SLars Ellenberg 
1336f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1337f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1338f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1339f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1340f9ff0da5SLars Ellenberg 
1341f9ff0da5SLars Ellenberg 		if (ctx.error) {
1342b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1343b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1344b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1345f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1346f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1347b411b363SPhilipp Reisner 		}
1348b411b363SPhilipp Reisner 	}
1349b411b363SPhilipp Reisner }
1350b411b363SPhilipp Reisner 
1351b411b363SPhilipp Reisner /**
1352b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1353b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1354b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1355b411b363SPhilipp Reisner  * @ev:		Epoch event.
1356b411b363SPhilipp Reisner  */
1357bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1358b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1359b411b363SPhilipp Reisner 					       enum epoch_event ev)
1360b411b363SPhilipp Reisner {
13612451fc3bSPhilipp Reisner 	int epoch_size;
1362b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1363b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1364b411b363SPhilipp Reisner 
1365bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1366b411b363SPhilipp Reisner 	do {
1367b411b363SPhilipp Reisner 		next_epoch = NULL;
1368b411b363SPhilipp Reisner 
1369b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1372b411b363SPhilipp Reisner 		case EV_PUT:
1373b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1374b411b363SPhilipp Reisner 			break;
1375b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1376b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1377b411b363SPhilipp Reisner 			break;
1378b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1379b411b363SPhilipp Reisner 			/* nothing to do*/
1380b411b363SPhilipp Reisner 			break;
1381b411b363SPhilipp Reisner 		}
1382b411b363SPhilipp Reisner 
1383b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1384b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138580f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1386b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1387bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1388bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1389bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1390b411b363SPhilipp Reisner 			}
13919ed57dcbSLars Ellenberg #if 0
13929ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13939ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139480f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1395bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13969ed57dcbSLars Ellenberg #endif
1397b411b363SPhilipp Reisner 
1398bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1399b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1400b411b363SPhilipp Reisner 				list_del(&epoch->list);
1401b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1402bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1403b411b363SPhilipp Reisner 				kfree(epoch);
1404b411b363SPhilipp Reisner 
1405b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1406b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1407b411b363SPhilipp Reisner 			} else {
1408b411b363SPhilipp Reisner 				epoch->flags = 0;
1409b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1410698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1411b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1412b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1413b411b363SPhilipp Reisner 			}
1414b411b363SPhilipp Reisner 		}
1415b411b363SPhilipp Reisner 
1416b411b363SPhilipp Reisner 		if (!next_epoch)
1417b411b363SPhilipp Reisner 			break;
1418b411b363SPhilipp Reisner 
1419b411b363SPhilipp Reisner 		epoch = next_epoch;
1420b411b363SPhilipp Reisner 	} while (1);
1421b411b363SPhilipp Reisner 
1422bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1423b411b363SPhilipp Reisner 
1424b411b363SPhilipp Reisner 	return rv;
1425b411b363SPhilipp Reisner }
1426b411b363SPhilipp Reisner 
14278fe39aacSPhilipp Reisner static enum write_ordering_e
14288fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14298fe39aacSPhilipp Reisner {
14308fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14318fe39aacSPhilipp Reisner 
14328fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14338fe39aacSPhilipp Reisner 
1434f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1435f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14388fe39aacSPhilipp Reisner 
14398fe39aacSPhilipp Reisner 	return wo;
14408fe39aacSPhilipp Reisner }
14418fe39aacSPhilipp Reisner 
1442b411b363SPhilipp Reisner /**
1443b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1444bde89a9eSAndreas Gruenbacher  * @connection:	DRBD connection.
1445b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1446b411b363SPhilipp Reisner  */
14478fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14488fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1449b411b363SPhilipp Reisner {
1450e9526580SPhilipp Reisner 	struct drbd_device *device;
1451b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14524b0007c0SPhilipp Reisner 	int vnr;
1453b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1454f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1455f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1456f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1457b411b363SPhilipp Reisner 	};
1458b411b363SPhilipp Reisner 
1459e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1460f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1461b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1462daeda1ccSPhilipp Reisner 	rcu_read_lock();
1463e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14648fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14658fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14668fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14678fe39aacSPhilipp Reisner 				bdev = NULL;
1468b30ab791SAndreas Gruenbacher 			put_ldev(device);
14694b0007c0SPhilipp Reisner 		}
14708fe39aacSPhilipp Reisner 	}
14718fe39aacSPhilipp Reisner 
14728fe39aacSPhilipp Reisner 	if (bdev)
14738fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14748fe39aacSPhilipp Reisner 
147570df7092SLars Ellenberg 	rcu_read_unlock();
147670df7092SLars Ellenberg 
1477e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1478f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1479e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1480b411b363SPhilipp Reisner }
1481b411b363SPhilipp Reisner 
1482f31e583aSLars Ellenberg /*
1483f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1484f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1485f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1486f31e583aSLars Ellenberg  * never even try to UNMAP.
1487f31e583aSLars Ellenberg  *
1488f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1489f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1490f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1491f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1492f31e583aSLars Ellenberg  *
1493f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1494f31e583aSLars Ellenberg  *
1495f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1496f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1497f31e583aSLars Ellenberg  *
1498f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1499f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1500f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1501f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1502f31e583aSLars Ellenberg  * still behaves this way.
1503f31e583aSLars Ellenberg  *
1504f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1505f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1506f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1507f31e583aSLars Ellenberg  *
1508f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1509f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1510f31e583aSLars Ellenberg  */
1511f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1512f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1513dd4f699dSLars Ellenberg {
15140dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1515f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(bdev);
1516f31e583aSLars Ellenberg 	sector_t tmp, nr;
1517f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1518f31e583aSLars Ellenberg 	int alignment;
1519f31e583aSLars Ellenberg 	int err = 0;
1520dd4f699dSLars Ellenberg 
1521f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1522f31e583aSLars Ellenberg 		goto zero_out;
1523f31e583aSLars Ellenberg 
1524f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
1525f31e583aSLars Ellenberg 	granularity = max(q->limits.discard_granularity >> 9, 1U);
1526f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1527f31e583aSLars Ellenberg 
1528f31e583aSLars Ellenberg 	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1529f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1530f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1531f31e583aSLars Ellenberg 		goto zero_out;
1532f31e583aSLars Ellenberg 
1533f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1534f31e583aSLars Ellenberg 		goto zero_out;
1535f31e583aSLars Ellenberg 
1536f31e583aSLars Ellenberg 	tmp = start;
1537f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1538f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1539f31e583aSLars Ellenberg 			goto zero_out;
1540f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1541f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1542f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1543f31e583aSLars Ellenberg 
1544f31e583aSLars Ellenberg 		nr = tmp - start;
1545f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1546f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1547f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1548f31e583aSLars Ellenberg 		nr_sectors -= nr;
1549f31e583aSLars Ellenberg 		start = tmp;
1550f31e583aSLars Ellenberg 	}
1551f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
1552f31e583aSLars Ellenberg 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
1553f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1554f31e583aSLars Ellenberg 		start += max_discard_sectors;
1555f31e583aSLars Ellenberg 	}
1556f31e583aSLars Ellenberg 	if (nr_sectors) {
1557f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1558f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1559f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1560f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1561f31e583aSLars Ellenberg 		nr = nr_sectors;
1562f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1563f31e583aSLars Ellenberg 		if (nr) {
1564f31e583aSLars Ellenberg 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1565f31e583aSLars Ellenberg 			nr_sectors -= nr;
1566f31e583aSLars Ellenberg 			start += nr;
1567f31e583aSLars Ellenberg 		}
1568f31e583aSLars Ellenberg 	}
1569f31e583aSLars Ellenberg  zero_out:
1570f31e583aSLars Ellenberg 	if (nr_sectors) {
1571f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1572f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1573f31e583aSLars Ellenberg 	}
1574f31e583aSLars Ellenberg 	return err != 0;
1575f31e583aSLars Ellenberg }
1576f31e583aSLars Ellenberg 
1577f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1578f31e583aSLars Ellenberg {
1579f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1580f31e583aSLars Ellenberg 	struct disk_conf *dc;
1581f31e583aSLars Ellenberg 	bool can_do;
1582f31e583aSLars Ellenberg 
1583f31e583aSLars Ellenberg 	if (!blk_queue_discard(q))
1584f31e583aSLars Ellenberg 		return false;
1585f31e583aSLars Ellenberg 
1586f31e583aSLars Ellenberg 	rcu_read_lock();
1587f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1588f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1589f31e583aSLars Ellenberg 	rcu_read_unlock();
1590f31e583aSLars Ellenberg 	return can_do;
1591f31e583aSLars Ellenberg }
1592f31e583aSLars Ellenberg 
1593f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1594f31e583aSLars Ellenberg {
1595f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1596f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1597f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1598f31e583aSLars Ellenberg 	 * otherwise. */
1599f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1600f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1601f31e583aSLars Ellenberg 
1602f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1603f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1604dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1605dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1606dd4f699dSLars Ellenberg }
1607dd4f699dSLars Ellenberg 
16089104d31aSLars Ellenberg static void drbd_issue_peer_wsame(struct drbd_device *device,
16099104d31aSLars Ellenberg 				  struct drbd_peer_request *peer_req)
16109104d31aSLars Ellenberg {
16119104d31aSLars Ellenberg 	struct block_device *bdev = device->ldev->backing_bdev;
16129104d31aSLars Ellenberg 	sector_t s = peer_req->i.sector;
16139104d31aSLars Ellenberg 	sector_t nr = peer_req->i.size >> 9;
16149104d31aSLars Ellenberg 	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
16159104d31aSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
16169104d31aSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
16179104d31aSLars Ellenberg }
16189104d31aSLars Ellenberg 
16199104d31aSLars Ellenberg 
1620b411b363SPhilipp Reisner /**
1621fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1622b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1623db830c46SAndreas Gruenbacher  * @peer_req:	peer request
16241eff9d32SJens Axboe  * @rw:		flag field, see bio->bi_opf
162510f6d992SLars Ellenberg  *
162610f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162710f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162810f6d992SLars Ellenberg  *
162910f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
163010f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
163110f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
163210f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
163310f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
163410f6d992SLars Ellenberg  *  on certain Xen deployments.
163545bb912bSLars Ellenberg  */
163645bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1637b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1638fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
1639bb3cc85eSMike Christie 			     const unsigned op, const unsigned op_flags,
1640bb3cc85eSMike Christie 			     const int fault_type)
164145bb912bSLars Ellenberg {
164245bb912bSLars Ellenberg 	struct bio *bios = NULL;
164345bb912bSLars Ellenberg 	struct bio *bio;
1644db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1645db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
164611f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
164745bb912bSLars Ellenberg 	unsigned n_bios = 0;
164811f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
164910f6d992SLars Ellenberg 	int err = -ENOMEM;
165045bb912bSLars Ellenberg 
1651dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1652dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1653dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1654dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1655dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1656dd4f699dSLars Ellenberg 	 */
1657f31e583aSLars Ellenberg 	if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
1658a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1659a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16605dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
166145d2933cSLars Ellenberg 		/* add it to the active list now,
166245d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
166321ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
166421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1665700ca8c0SPhilipp Reisner 
1666700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1667700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1668700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166945d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
167045d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
167145d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1672700ca8c0SPhilipp Reisner 		}
1673700ca8c0SPhilipp Reisner 
1674f31e583aSLars Ellenberg 		if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
1675f31e583aSLars Ellenberg 			drbd_issue_peer_discard_or_zero_out(device, peer_req);
16769104d31aSLars Ellenberg 		else /* EE_WRITE_SAME */
16779104d31aSLars Ellenberg 			drbd_issue_peer_wsame(device, peer_req);
1678a0fb3c47SLars Ellenberg 		return 0;
1679a0fb3c47SLars Ellenberg 	}
1680a0fb3c47SLars Ellenberg 
168145bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
168245bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
168345bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16849476f39dSLars Ellenberg 	 * request in more than one bio.
16859476f39dSLars Ellenberg 	 *
16869476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16879476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16889476f39dSLars Ellenberg 	 */
168945bb912bSLars Ellenberg next_bio:
169045bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
169145bb912bSLars Ellenberg 	if (!bio) {
1692a0fb3c47SLars Ellenberg 		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
169345bb912bSLars Ellenberg 		goto fail;
169445bb912bSLars Ellenberg 	}
1695db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16964f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
169774d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1698bb3cc85eSMike Christie 	bio_set_op_attrs(bio, op, op_flags);
1699db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1700fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
170145bb912bSLars Ellenberg 
170245bb912bSLars Ellenberg 	bio->bi_next = bios;
170345bb912bSLars Ellenberg 	bios = bio;
170445bb912bSLars Ellenberg 	++n_bios;
170545bb912bSLars Ellenberg 
170645bb912bSLars Ellenberg 	page_chain_for_each(page) {
170711f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170806efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170945bb912bSLars Ellenberg 			goto next_bio;
171011f8b2b6SAndreas Gruenbacher 		data_size -= len;
171145bb912bSLars Ellenberg 		sector += len >> 9;
171245bb912bSLars Ellenberg 		--nr_pages;
171345bb912bSLars Ellenberg 	}
171411f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1715a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
171645bb912bSLars Ellenberg 
1717db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171821ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171921ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
172021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
172145bb912bSLars Ellenberg 	do {
172245bb912bSLars Ellenberg 		bio = bios;
172345bb912bSLars Ellenberg 		bios = bios->bi_next;
172445bb912bSLars Ellenberg 		bio->bi_next = NULL;
172545bb912bSLars Ellenberg 
1726b30ab791SAndreas Gruenbacher 		drbd_generic_make_request(device, fault_type, bio);
172745bb912bSLars Ellenberg 	} while (bios);
172845bb912bSLars Ellenberg 	return 0;
172945bb912bSLars Ellenberg 
173045bb912bSLars Ellenberg fail:
173145bb912bSLars Ellenberg 	while (bios) {
173245bb912bSLars Ellenberg 		bio = bios;
173345bb912bSLars Ellenberg 		bios = bios->bi_next;
173445bb912bSLars Ellenberg 		bio_put(bio);
173545bb912bSLars Ellenberg 	}
173610f6d992SLars Ellenberg 	return err;
173745bb912bSLars Ellenberg }
173845bb912bSLars Ellenberg 
1739b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1740db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
174153840641SAndreas Gruenbacher {
1742db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
174353840641SAndreas Gruenbacher 
1744b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
174553840641SAndreas Gruenbacher 	drbd_clear_interval(i);
174653840641SAndreas Gruenbacher 
17476c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
174853840641SAndreas Gruenbacher 	if (i->waiting)
1749b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
175053840641SAndreas Gruenbacher }
175153840641SAndreas Gruenbacher 
1752bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
175377fede51SPhilipp Reisner {
1754c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
175577fede51SPhilipp Reisner 	int vnr;
175677fede51SPhilipp Reisner 
175777fede51SPhilipp Reisner 	rcu_read_lock();
1758c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1759c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1760c06ece6bSAndreas Gruenbacher 
1761b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
176277fede51SPhilipp Reisner 		rcu_read_unlock();
1763b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
176405a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
176577fede51SPhilipp Reisner 		rcu_read_lock();
176677fede51SPhilipp Reisner 	}
176777fede51SPhilipp Reisner 	rcu_read_unlock();
176877fede51SPhilipp Reisner }
176977fede51SPhilipp Reisner 
1770bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1771b411b363SPhilipp Reisner {
17722451fc3bSPhilipp Reisner 	int rv;
1773e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1774b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1775b411b363SPhilipp Reisner 
17769ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17779ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17789ed57dcbSLars Ellenberg 	 */
1779bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1780bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1781bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1782b411b363SPhilipp Reisner 
1783b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1784b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1785b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1786b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1787b411b363SPhilipp Reisner 	 * completed. */
1788e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1789f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1790b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
179182bc0194SAndreas Gruenbacher 			return 0;
1792b411b363SPhilipp Reisner 
1793b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1794b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1795b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17962451fc3bSPhilipp Reisner 		if (epoch)
17972451fc3bSPhilipp Reisner 			break;
17982451fc3bSPhilipp Reisner 		else
17991ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
18002451fc3bSPhilipp Reisner 			/* Fall through */
18012451fc3bSPhilipp Reisner 
1802f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1803f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1804bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1805bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
18062451fc3bSPhilipp Reisner 
1807bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
18082451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
18092451fc3bSPhilipp Reisner 			if (epoch)
18102451fc3bSPhilipp Reisner 				break;
1811b411b363SPhilipp Reisner 		}
1812b411b363SPhilipp Reisner 
181382bc0194SAndreas Gruenbacher 		return 0;
18142451fc3bSPhilipp Reisner 	default:
1815e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1816e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
181782bc0194SAndreas Gruenbacher 		return -EIO;
1818b411b363SPhilipp Reisner 	}
1819b411b363SPhilipp Reisner 
1820b411b363SPhilipp Reisner 	epoch->flags = 0;
1821b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1822b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1823b411b363SPhilipp Reisner 
1824bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1825bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1826bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1827bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1828bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1829b411b363SPhilipp Reisner 	} else {
1830b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1831b411b363SPhilipp Reisner 		kfree(epoch);
1832b411b363SPhilipp Reisner 	}
1833bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1834b411b363SPhilipp Reisner 
183582bc0194SAndreas Gruenbacher 	return 0;
1836b411b363SPhilipp Reisner }
1837b411b363SPhilipp Reisner 
18389104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18393d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18409104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18419104d31aSLars Ellenberg 			      unsigned int payload_size)
18429104d31aSLars Ellenberg {
18439104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18449104d31aSLars Ellenberg 	r->i.size = payload_size;
18459104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18469104d31aSLars Ellenberg 	r->i.size = tmp;
18479104d31aSLars Ellenberg }
18489104d31aSLars Ellenberg 
1849b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18509104d31aSLars Ellenberg  * and from receive_Data.
18519104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18529104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18539104d31aSLars Ellenberg  * 	for discards, that is zero.
18549104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18559104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18569104d31aSLars Ellenberg  * as extra argument in the packet header.
18579104d31aSLars Ellenberg  */
1858f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
185969a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1860a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1861b411b363SPhilipp Reisner {
186269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1863b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
1864db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1865b411b363SPhilipp Reisner 	struct page *page;
186611f8b2b6SAndreas Gruenbacher 	int digest_size, err;
186711f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
186869a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
186969a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18706b4388acSPhilipp Reisner 	unsigned long *data;
1871a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1872f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
18739104d31aSLars Ellenberg 	struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1874b411b363SPhilipp Reisner 
187511f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1876a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18773d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18789f5bdc33SAndreas Gruenbacher 		/*
18799f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18809f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18819f5bdc33SAndreas Gruenbacher 		 */
188211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1883a5c31904SAndreas Gruenbacher 		if (err)
1884b411b363SPhilipp Reisner 			return NULL;
188511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
188688104ca4SAndreas Gruenbacher 	}
1887b411b363SPhilipp Reisner 
18889104d31aSLars Ellenberg 	/* assume request_size == data_size, but special case trim and wsame. */
18899104d31aSLars Ellenberg 	ds = data_size;
1890a0fb3c47SLars Ellenberg 	if (trim) {
18919104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18929104d31aSLars Ellenberg 			return NULL;
18939104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1894f31e583aSLars Ellenberg 	} else if (zeroes) {
1895f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1896f31e583aSLars Ellenberg 			return NULL;
1897f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
18989104d31aSLars Ellenberg 	} else if (wsame) {
18999104d31aSLars Ellenberg 		if (data_size != queue_logical_block_size(device->rq_queue)) {
19009104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
19019104d31aSLars Ellenberg 				data_size, queue_logical_block_size(device->rq_queue));
19029104d31aSLars Ellenberg 			return NULL;
19039104d31aSLars Ellenberg 		}
19049104d31aSLars Ellenberg 		if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
19059104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
19069104d31aSLars Ellenberg 				data_size, bdev_logical_block_size(device->ldev->backing_bdev));
19079104d31aSLars Ellenberg 			return NULL;
19089104d31aSLars Ellenberg 		}
19099104d31aSLars Ellenberg 		ds = be32_to_cpu(wsame->size);
1910a0fb3c47SLars Ellenberg 	}
1911a0fb3c47SLars Ellenberg 
19129104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1913841ce241SAndreas Gruenbacher 		return NULL;
1914f31e583aSLars Ellenberg 	if (trim || wsame || zeroes) {
19159104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
19169104d31aSLars Ellenberg 			return NULL;
19179104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1918841ce241SAndreas Gruenbacher 		return NULL;
1919b411b363SPhilipp Reisner 
19206666032aSLars Ellenberg 	/* even though we trust out peer,
19216666032aSLars Ellenberg 	 * we sometimes have to double check. */
19229104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1923d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1924fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
19256666032aSLars Ellenberg 			(unsigned long long)capacity,
19269104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19276666032aSLars Ellenberg 		return NULL;
19286666032aSLars Ellenberg 	}
19296666032aSLars Ellenberg 
1930b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1931b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1932b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19339104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1934db830c46SAndreas Gruenbacher 	if (!peer_req)
1935b411b363SPhilipp Reisner 		return NULL;
193645bb912bSLars Ellenberg 
193721ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19389104d31aSLars Ellenberg 	if (trim) {
1939f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1940f31e583aSLars Ellenberg 		return peer_req;
1941f31e583aSLars Ellenberg 	}
1942f31e583aSLars Ellenberg 	if (zeroes) {
1943f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
194481a3537aSLars Ellenberg 		return peer_req;
19459104d31aSLars Ellenberg 	}
19469104d31aSLars Ellenberg 	if (wsame)
19479104d31aSLars Ellenberg 		peer_req->flags |= EE_WRITE_SAME;
1948a73ff323SLars Ellenberg 
19499104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1950b411b363SPhilipp Reisner 	ds = data_size;
1951db830c46SAndreas Gruenbacher 	page = peer_req->pages;
195245bb912bSLars Ellenberg 	page_chain_for_each(page) {
195345bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19546b4388acSPhilipp Reisner 		data = kmap(page);
195569a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1956b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1957d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19586b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19596b4388acSPhilipp Reisner 		}
1960b411b363SPhilipp Reisner 		kunmap(page);
1961a5c31904SAndreas Gruenbacher 		if (err) {
1962b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1963b411b363SPhilipp Reisner 			return NULL;
1964b411b363SPhilipp Reisner 		}
1965a5c31904SAndreas Gruenbacher 		ds -= len;
1966b411b363SPhilipp Reisner 	}
1967b411b363SPhilipp Reisner 
196811f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19699104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
197011f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1971d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1972470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1973b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1974b411b363SPhilipp Reisner 			return NULL;
1975b411b363SPhilipp Reisner 		}
1976b411b363SPhilipp Reisner 	}
1977b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1978db830c46SAndreas Gruenbacher 	return peer_req;
1979b411b363SPhilipp Reisner }
1980b411b363SPhilipp Reisner 
1981b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1982b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1983b411b363SPhilipp Reisner  */
198469a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1985b411b363SPhilipp Reisner {
1986b411b363SPhilipp Reisner 	struct page *page;
1987a5c31904SAndreas Gruenbacher 	int err = 0;
1988b411b363SPhilipp Reisner 	void *data;
1989b411b363SPhilipp Reisner 
1990c3470cdeSLars Ellenberg 	if (!data_size)
1991fc5be839SAndreas Gruenbacher 		return 0;
1992c3470cdeSLars Ellenberg 
199369a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1994b411b363SPhilipp Reisner 
1995b411b363SPhilipp Reisner 	data = kmap(page);
1996b411b363SPhilipp Reisner 	while (data_size) {
1997fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1998fc5be839SAndreas Gruenbacher 
199969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
2000a5c31904SAndreas Gruenbacher 		if (err)
2001b411b363SPhilipp Reisner 			break;
2002a5c31904SAndreas Gruenbacher 		data_size -= len;
2003b411b363SPhilipp Reisner 	}
2004b411b363SPhilipp Reisner 	kunmap(page);
200569a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
2006fc5be839SAndreas Gruenbacher 	return err;
2007b411b363SPhilipp Reisner }
2008b411b363SPhilipp Reisner 
200969a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
2010b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
2011b411b363SPhilipp Reisner {
20127988613bSKent Overstreet 	struct bio_vec bvec;
20137988613bSKent Overstreet 	struct bvec_iter iter;
2014b411b363SPhilipp Reisner 	struct bio *bio;
201511f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
201669a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
201769a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
2018b411b363SPhilipp Reisner 
201911f8b2b6SAndreas Gruenbacher 	digest_size = 0;
202069a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
20213d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
202211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
2023a5c31904SAndreas Gruenbacher 		if (err)
2024a5c31904SAndreas Gruenbacher 			return err;
202511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
202688104ca4SAndreas Gruenbacher 	}
2027b411b363SPhilipp Reisner 
2028b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2029b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
203069a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2031b411b363SPhilipp Reisner 
2032b411b363SPhilipp Reisner 	bio = req->master_bio;
203369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2034b411b363SPhilipp Reisner 
20357988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20367988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
20377988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
203869a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20397988613bSKent Overstreet 		kunmap(bvec.bv_page);
2040a5c31904SAndreas Gruenbacher 		if (err)
2041a5c31904SAndreas Gruenbacher 			return err;
2042a5c31904SAndreas Gruenbacher 		data_size -= expect;
2043b411b363SPhilipp Reisner 	}
2044b411b363SPhilipp Reisner 
204511f8b2b6SAndreas Gruenbacher 	if (digest_size) {
204669a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
204711f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
204869a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
204928284cefSAndreas Gruenbacher 			return -EINVAL;
2050b411b363SPhilipp Reisner 		}
2051b411b363SPhilipp Reisner 	}
2052b411b363SPhilipp Reisner 
205369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
205428284cefSAndreas Gruenbacher 	return 0;
2055b411b363SPhilipp Reisner }
2056b411b363SPhilipp Reisner 
2057a990be46SAndreas Gruenbacher /*
2058668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2059a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2060a990be46SAndreas Gruenbacher  */
206199920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2062b411b363SPhilipp Reisner {
20638050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2064a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2065a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2066a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2067db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
206899920dc5SAndreas Gruenbacher 	int err;
2069b411b363SPhilipp Reisner 
20700b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2071b411b363SPhilipp Reisner 
2072db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2073b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2074a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2075b411b363SPhilipp Reisner 	} else {
2076b411b363SPhilipp Reisner 		/* Record failure to sync */
2077b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2078b411b363SPhilipp Reisner 
2079a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2080b411b363SPhilipp Reisner 	}
2081b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2082b411b363SPhilipp Reisner 
208399920dc5SAndreas Gruenbacher 	return err;
2084b411b363SPhilipp Reisner }
2085b411b363SPhilipp Reisner 
208669a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2087a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2088b411b363SPhilipp Reisner {
208969a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2090db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2091b411b363SPhilipp Reisner 
2092a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2093db830c46SAndreas Gruenbacher 	if (!peer_req)
209445bb912bSLars Ellenberg 		goto fail;
2095b411b363SPhilipp Reisner 
2096b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2097b411b363SPhilipp Reisner 
2098b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2099b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2100b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2101b411b363SPhilipp Reisner 
2102a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
210321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
210445bb912bSLars Ellenberg 
21050500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2106b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
21070500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2108b411b363SPhilipp Reisner 
2109a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2110bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2111bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
2112e1c1b0fcSAndreas Gruenbacher 		return 0;
211345bb912bSLars Ellenberg 
211410f6d992SLars Ellenberg 	/* don't care for the reason here */
2115d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
21160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2117a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
21180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
211922cc37a9SLars Ellenberg 
2120b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
212145bb912bSLars Ellenberg fail:
2122b30ab791SAndreas Gruenbacher 	put_ldev(device);
2123e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2124b411b363SPhilipp Reisner }
2125b411b363SPhilipp Reisner 
2126668eebc6SAndreas Gruenbacher static struct drbd_request *
2127b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2128bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2129b411b363SPhilipp Reisner {
2130b411b363SPhilipp Reisner 	struct drbd_request *req;
2131668eebc6SAndreas Gruenbacher 
2132bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2133bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21345e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2135668eebc6SAndreas Gruenbacher 		return req;
2136c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2137d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2138c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2139c3afd8f5SAndreas Gruenbacher 	}
2140668eebc6SAndreas Gruenbacher 	return NULL;
2141668eebc6SAndreas Gruenbacher }
2142668eebc6SAndreas Gruenbacher 
2143bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2144b411b363SPhilipp Reisner {
21459f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2146b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2147b411b363SPhilipp Reisner 	struct drbd_request *req;
2148b411b363SPhilipp Reisner 	sector_t sector;
214982bc0194SAndreas Gruenbacher 	int err;
2150e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21514a76b161SAndreas Gruenbacher 
21529f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21539f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21544a76b161SAndreas Gruenbacher 		return -EIO;
21559f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2156b411b363SPhilipp Reisner 
2157b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2158b411b363SPhilipp Reisner 
21590500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2160b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21610500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2162c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
216382bc0194SAndreas Gruenbacher 		return -EIO;
2164b411b363SPhilipp Reisner 
216524c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
2166b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
2167b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
216869a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
216982bc0194SAndreas Gruenbacher 	if (!err)
21708554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2171b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2172b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2173b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2174b411b363SPhilipp Reisner 
217582bc0194SAndreas Gruenbacher 	return err;
2176b411b363SPhilipp Reisner }
2177b411b363SPhilipp Reisner 
2178bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2179b411b363SPhilipp Reisner {
21809f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2181b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2182b411b363SPhilipp Reisner 	sector_t sector;
218382bc0194SAndreas Gruenbacher 	int err;
2184e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21854a76b161SAndreas Gruenbacher 
21869f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21879f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21884a76b161SAndreas Gruenbacher 		return -EIO;
21899f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2190b411b363SPhilipp Reisner 
2191b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21920b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2193b411b363SPhilipp Reisner 
2194b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2195b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2196b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2197fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2198a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2199b411b363SPhilipp Reisner 	} else {
2200b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2201d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2202b411b363SPhilipp Reisner 
220369a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2204b411b363SPhilipp Reisner 
220569a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2206b411b363SPhilipp Reisner 	}
2207b411b363SPhilipp Reisner 
2208b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2209778f271dSPhilipp Reisner 
221082bc0194SAndreas Gruenbacher 	return err;
2211b411b363SPhilipp Reisner }
2212b411b363SPhilipp Reisner 
2213b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
22147be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2215b411b363SPhilipp Reisner {
22167be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
22177be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2218b411b363SPhilipp Reisner 
2219b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22207be8da07SAndreas Gruenbacher 		if (!i->local)
22217be8da07SAndreas Gruenbacher 			continue;
22227be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
22237be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
22247be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
22257be8da07SAndreas Gruenbacher 			continue;
22262312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
22272312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2228d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
22297be8da07SAndreas Gruenbacher 	}
22307be8da07SAndreas Gruenbacher }
22317be8da07SAndreas Gruenbacher 
2232a990be46SAndreas Gruenbacher /*
2233668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2234b411b363SPhilipp Reisner  */
223599920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2236b411b363SPhilipp Reisner {
22378050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2238a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2239a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2240a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2241db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
224299920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2243b411b363SPhilipp Reisner 
2244303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2245db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2246b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2247b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2248db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2249b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2250a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2251b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2252b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2253b411b363SPhilipp Reisner 		} else {
2254a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2255b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2256b411b363SPhilipp Reisner 			 * maybe assert this?  */
2257b411b363SPhilipp Reisner 		}
2258b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2259b411b363SPhilipp Reisner 	}
226008d0dabfSLars Ellenberg 
2261b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2262b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2263302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22640500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22650b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2266b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22677be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2268b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22690500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2270bb3bfe96SAndreas Gruenbacher 	} else
22710b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2272b411b363SPhilipp Reisner 
22735dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2274b411b363SPhilipp Reisner 
227599920dc5SAndreas Gruenbacher 	return err;
2276b411b363SPhilipp Reisner }
2277b411b363SPhilipp Reisner 
2278a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2279b411b363SPhilipp Reisner {
22808050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2281a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2282a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
228399920dc5SAndreas Gruenbacher 	int err;
2284b411b363SPhilipp Reisner 
2285a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2286a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2287b411b363SPhilipp Reisner 
228899920dc5SAndreas Gruenbacher 	return err;
2289b411b363SPhilipp Reisner }
2290b411b363SPhilipp Reisner 
2291d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2292b6a370baSPhilipp Reisner {
2293a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22947be8da07SAndreas Gruenbacher }
2295b6a370baSPhilipp Reisner 
229699920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22977be8da07SAndreas Gruenbacher {
2298a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2299a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2300a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
23017be8da07SAndreas Gruenbacher 
2302a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2303d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
23047be8da07SAndreas Gruenbacher }
23057be8da07SAndreas Gruenbacher 
23063e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
23073e394da1SAndreas Gruenbacher {
23083e394da1SAndreas Gruenbacher 	/*
23093e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
23103e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
23113e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
23123e394da1SAndreas Gruenbacher 	 */
23133e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
23143e394da1SAndreas Gruenbacher }
23153e394da1SAndreas Gruenbacher 
23163e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
23173e394da1SAndreas Gruenbacher {
23183e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
23193e394da1SAndreas Gruenbacher }
23203e394da1SAndreas Gruenbacher 
232169a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
23223e394da1SAndreas Gruenbacher {
232369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
23243c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
23253e394da1SAndreas Gruenbacher 
232669a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2327b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2328b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2329b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2330b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2331b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23323c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2333b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23343e394da1SAndreas Gruenbacher 	}
23357be8da07SAndreas Gruenbacher }
23363e394da1SAndreas Gruenbacher 
2337d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2338d93f6302SLars Ellenberg {
2339d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2340d93f6302SLars Ellenberg }
2341d93f6302SLars Ellenberg 
2342d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2343b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2344d93f6302SLars Ellenberg {
2345d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23467e5fec31SFabian Frederick 	bool rv = false;
2347b6a370baSPhilipp Reisner 
23480500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2349a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2350d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2351d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23527e5fec31SFabian Frederick 			rv = true;
2353b6a370baSPhilipp Reisner 			break;
2354b6a370baSPhilipp Reisner 		}
2355b6a370baSPhilipp Reisner 	}
23560500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2357b6a370baSPhilipp Reisner 
2358b6a370baSPhilipp Reisner 	return rv;
2359b6a370baSPhilipp Reisner }
2360b6a370baSPhilipp Reisner 
2361b411b363SPhilipp Reisner /* Called from receive_Data.
2362b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2363b411b363SPhilipp Reisner  *
2364b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2365b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2366b411b363SPhilipp Reisner  * been sent.
2367b411b363SPhilipp Reisner  *
2368b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2369b411b363SPhilipp Reisner  *
2370b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2371b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2372b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2373b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2374b411b363SPhilipp Reisner  *
2375b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2376b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2377b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2378b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2379b411b363SPhilipp Reisner  *
2380b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2381b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
238269a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2383b411b363SPhilipp Reisner {
238469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2385b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2386b411b363SPhilipp Reisner 	long timeout;
2387b874d231SPhilipp Reisner 	int ret = 0, tp;
23887be8da07SAndreas Gruenbacher 
238969a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23907be8da07SAndreas Gruenbacher 		return 0;
23917be8da07SAndreas Gruenbacher 
2392b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2393b411b363SPhilipp Reisner 	for (;;) {
2394b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2395b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2396b411b363SPhilipp Reisner 			break;
23977be8da07SAndreas Gruenbacher 		}
2398b874d231SPhilipp Reisner 
2399b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2400b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2401b411b363SPhilipp Reisner 			break;
2402b411b363SPhilipp Reisner 		}
2403b874d231SPhilipp Reisner 
2404b874d231SPhilipp Reisner 		rcu_read_lock();
24055dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2406b874d231SPhilipp Reisner 		rcu_read_unlock();
2407b874d231SPhilipp Reisner 
2408b874d231SPhilipp Reisner 		if (!tp)
2409b874d231SPhilipp Reisner 			break;
2410b874d231SPhilipp Reisner 
2411b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2412b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2413b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
241444ed167dSPhilipp Reisner 		rcu_read_lock();
241569a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
241644ed167dSPhilipp Reisner 		rcu_read_unlock();
241771b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2418b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
24197be8da07SAndreas Gruenbacher 		if (!timeout) {
2420b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2421d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2422b411b363SPhilipp Reisner 			break;
2423b411b363SPhilipp Reisner 		}
2424b411b363SPhilipp Reisner 	}
2425b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2426b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2427b411b363SPhilipp Reisner 	return ret;
2428b411b363SPhilipp Reisner }
2429b411b363SPhilipp Reisner 
2430688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2431688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2432688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2433bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf)
243476d2e7ecSPhilipp Reisner {
243576d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
243676d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
243728a8f0d3SMike Christie 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2438bb3cc85eSMike Christie }
2439bb3cc85eSMike Christie 
2440bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf)
2441bb3cc85eSMike Christie {
2442f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
244345c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2444f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2445f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2446f31e583aSLars Ellenberg 	if (dpf & DP_WSAME)
2447f31e583aSLars Ellenberg 		return REQ_OP_WRITE_SAME;
2448bb3cc85eSMike Christie 	else
2449bb3cc85eSMike Christie 		return REQ_OP_WRITE;
245076d2e7ecSPhilipp Reisner }
245176d2e7ecSPhilipp Reisner 
2452b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24537be8da07SAndreas Gruenbacher 				    unsigned int size)
2454b411b363SPhilipp Reisner {
24557be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24567be8da07SAndreas Gruenbacher 
24577be8da07SAndreas Gruenbacher     repeat:
2458b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24597be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24607be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24617be8da07SAndreas Gruenbacher 
24627be8da07SAndreas Gruenbacher 		if (!i->local)
24637be8da07SAndreas Gruenbacher 			continue;
24647be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24657be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24667be8da07SAndreas Gruenbacher 			continue;
24677be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24687be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24690500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24707be8da07SAndreas Gruenbacher 		if (m.bio)
2471b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24720500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24737be8da07SAndreas Gruenbacher 		goto repeat;
24747be8da07SAndreas Gruenbacher 	}
24757be8da07SAndreas Gruenbacher }
24767be8da07SAndreas Gruenbacher 
2477b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24787be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24797be8da07SAndreas Gruenbacher {
2480e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2481bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24827be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24837be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24847be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24857be8da07SAndreas Gruenbacher 	bool equal;
24867be8da07SAndreas Gruenbacher 	int err;
24877be8da07SAndreas Gruenbacher 
24887be8da07SAndreas Gruenbacher 	/*
24897be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24907be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24917be8da07SAndreas Gruenbacher 	 */
2492b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24937be8da07SAndreas Gruenbacher 
24947be8da07SAndreas Gruenbacher     repeat:
2495b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24967be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24977be8da07SAndreas Gruenbacher 			continue;
249808d0dabfSLars Ellenberg 		if (i->completed)
249908d0dabfSLars Ellenberg 			continue;
25007be8da07SAndreas Gruenbacher 
25017be8da07SAndreas Gruenbacher 		if (!i->local) {
25027be8da07SAndreas Gruenbacher 			/*
25037be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
25047be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
25057be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
25067be8da07SAndreas Gruenbacher 			 */
2507b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
25087be8da07SAndreas Gruenbacher 			if (err)
25097be8da07SAndreas Gruenbacher 				goto out;
25107be8da07SAndreas Gruenbacher 			goto repeat;
25117be8da07SAndreas Gruenbacher 		}
25127be8da07SAndreas Gruenbacher 
25137be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
25147be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
25157be8da07SAndreas Gruenbacher 			/*
25167be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2517d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2518d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2519d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
25207be8da07SAndreas Gruenbacher 			 */
2521d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
25227be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
25237be8da07SAndreas Gruenbacher 
25247be8da07SAndreas Gruenbacher 			if (!equal)
2525d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25267be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
25277be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
25287be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25297be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2530d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
25317be8da07SAndreas Gruenbacher 
2532a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
25337be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2534a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2535668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25367be8da07SAndreas Gruenbacher 
25377be8da07SAndreas Gruenbacher 			err = -ENOENT;
25387be8da07SAndreas Gruenbacher 			goto out;
25397be8da07SAndreas Gruenbacher 		} else {
25407be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25417be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25427be8da07SAndreas Gruenbacher 
25437be8da07SAndreas Gruenbacher 			if (!equal)
2544d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25457be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25467be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25477be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25487be8da07SAndreas Gruenbacher 
25497be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25507be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25517be8da07SAndreas Gruenbacher 				/*
25527be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2553d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2554d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2555d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25567be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25577be8da07SAndreas Gruenbacher 				 *
25587be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25597be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25607be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25617be8da07SAndreas Gruenbacher 				 */
2562b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25637be8da07SAndreas Gruenbacher 				if (err) {
2564e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2565b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25667be8da07SAndreas Gruenbacher 					goto out;
25677be8da07SAndreas Gruenbacher 				}
25687be8da07SAndreas Gruenbacher 				goto repeat;
25697be8da07SAndreas Gruenbacher 			}
25707be8da07SAndreas Gruenbacher 			/*
25717be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25727be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25737be8da07SAndreas Gruenbacher 			 */
25747be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25757be8da07SAndreas Gruenbacher 		}
25767be8da07SAndreas Gruenbacher 	}
25777be8da07SAndreas Gruenbacher 	err = 0;
25787be8da07SAndreas Gruenbacher 
25797be8da07SAndreas Gruenbacher     out:
25807be8da07SAndreas Gruenbacher 	if (err)
2581b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25827be8da07SAndreas Gruenbacher 	return err;
25837be8da07SAndreas Gruenbacher }
25847be8da07SAndreas Gruenbacher 
2585b411b363SPhilipp Reisner /* mirrored write */
2586bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2587b411b363SPhilipp Reisner {
25889f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2589b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
259021ae5d7fSLars Ellenberg 	struct net_conf *nc;
2591b411b363SPhilipp Reisner 	sector_t sector;
2592db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2593e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25947be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2595bb3cc85eSMike Christie 	int op, op_flags;
2596b411b363SPhilipp Reisner 	u32 dp_flags;
2597302bdeaeSPhilipp Reisner 	int err, tp;
25987be8da07SAndreas Gruenbacher 
25999f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
26009f4fe9adSAndreas Gruenbacher 	if (!peer_device)
26014a76b161SAndreas Gruenbacher 		return -EIO;
26029f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2603b411b363SPhilipp Reisner 
2604b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
260582bc0194SAndreas Gruenbacher 		int err2;
2606b411b363SPhilipp Reisner 
260769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
260869a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2609bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
261069a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
261182bc0194SAndreas Gruenbacher 		if (!err)
261282bc0194SAndreas Gruenbacher 			err = err2;
261382bc0194SAndreas Gruenbacher 		return err;
2614b411b363SPhilipp Reisner 	}
2615b411b363SPhilipp Reisner 
2616fcefa62eSAndreas Gruenbacher 	/*
2617fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2618fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2619fcefa62eSAndreas Gruenbacher 	 * end of this function.
2620fcefa62eSAndreas Gruenbacher 	 */
2621b411b363SPhilipp Reisner 
2622b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2623a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2624db830c46SAndreas Gruenbacher 	if (!peer_req) {
2625b30ab791SAndreas Gruenbacher 		put_ldev(device);
262682bc0194SAndreas Gruenbacher 		return -EIO;
2627b411b363SPhilipp Reisner 	}
2628b411b363SPhilipp Reisner 
2629a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
263021ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
263121ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2632b411b363SPhilipp Reisner 
2633688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2634bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2635bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2636a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2637a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2638f31e583aSLars Ellenberg 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2639f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2640f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2641f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2642f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2643f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2644f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2645f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
264645c21793SChristoph Hellwig 		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2647a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2648f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2649f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2650f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2651a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26520b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26530b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2654a73ff323SLars Ellenberg 	}
2655688593c5SLars Ellenberg 
2656688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2657db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2658688593c5SLars Ellenberg 
2659bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2660bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2661db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2662db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2663bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2664b411b363SPhilipp Reisner 
2665302bdeaeSPhilipp Reisner 	rcu_read_lock();
266621ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
266721ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
266821ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
266921ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
267021ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
267121ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
267221ae5d7fSLars Ellenberg 			break;
267321ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
267421ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
267521ae5d7fSLars Ellenberg 			break;
267621ae5d7fSLars Ellenberg 		}
267721ae5d7fSLars Ellenberg 	}
2678302bdeaeSPhilipp Reisner 	rcu_read_unlock();
267921ae5d7fSLars Ellenberg 
268021ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
268121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
268221ae5d7fSLars Ellenberg 		inc_unacked(device);
268321ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
268421ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
268521ae5d7fSLars Ellenberg 	}
268621ae5d7fSLars Ellenberg 
268721ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
268821ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
268921ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26905dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
269121ae5d7fSLars Ellenberg 	}
269221ae5d7fSLars Ellenberg 
2693302bdeaeSPhilipp Reisner 	if (tp) {
269421ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
269521ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2696302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
269769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26987be8da07SAndreas Gruenbacher 		if (err)
2699b411b363SPhilipp Reisner 			goto out_interrupted;
27000500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2701b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
27027be8da07SAndreas Gruenbacher 		if (err) {
27030500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
27047be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2705b30ab791SAndreas Gruenbacher 				put_ldev(device);
270682bc0194SAndreas Gruenbacher 				return 0;
2707b411b363SPhilipp Reisner 			}
2708b411b363SPhilipp Reisner 			goto out_interrupted;
2709b411b363SPhilipp Reisner 		}
2710b874d231SPhilipp Reisner 	} else {
271169a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
27120500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2713b874d231SPhilipp Reisner 	}
27149104d31aSLars Ellenberg 	/* TRIM and WRITE_SAME are processed synchronously,
27159104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2716a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2717a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2718f31e583aSLars Ellenberg 	if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
2719b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
27200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2721b411b363SPhilipp Reisner 
2722b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2723b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2724b6a370baSPhilipp Reisner 
2725b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2726b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2727b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2728db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
27294dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
273021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2731b411b363SPhilipp Reisner 	}
2732b411b363SPhilipp Reisner 
2733bb3cc85eSMike Christie 	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2734bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
273582bc0194SAndreas Gruenbacher 	if (!err)
273682bc0194SAndreas Gruenbacher 		return 0;
2737b411b363SPhilipp Reisner 
273810f6d992SLars Ellenberg 	/* don't care for the reason here */
2739d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27400500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2741a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2742b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27430500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
274421ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
274521ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2746b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
274721ae5d7fSLars Ellenberg 	}
274822cc37a9SLars Ellenberg 
2749b411b363SPhilipp Reisner out_interrupted:
27507e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2751b30ab791SAndreas Gruenbacher 	put_ldev(device);
2752b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
275382bc0194SAndreas Gruenbacher 	return err;
2754b411b363SPhilipp Reisner }
2755b411b363SPhilipp Reisner 
27560f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27570f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27580f0601f4SLars Ellenberg  *
27590f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27600f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27610f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27620f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27630f0601f4SLars Ellenberg  *
27640f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27650f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27660f0601f4SLars Ellenberg  */
2767ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2768ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2769e8299874SLars Ellenberg {
2770e8299874SLars Ellenberg 	struct lc_element *tmp;
2771ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2772e8299874SLars Ellenberg 
2773ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2774ad3fee79SLars Ellenberg 		return throttle;
2775e8299874SLars Ellenberg 
2776e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2777e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2778e8299874SLars Ellenberg 	if (tmp) {
2779e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2780e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2781e8299874SLars Ellenberg 			throttle = false;
2782ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2783ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2784e8299874SLars Ellenberg 	}
2785e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2786e8299874SLars Ellenberg 
2787e8299874SLars Ellenberg 	return throttle;
2788e8299874SLars Ellenberg }
2789e8299874SLars Ellenberg 
2790e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27910f0601f4SLars Ellenberg {
2792b30ab791SAndreas Gruenbacher 	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
27930f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2794daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2795e8299874SLars Ellenberg 	int curr_events;
2796daeda1ccSPhilipp Reisner 
2797daeda1ccSPhilipp Reisner 	rcu_read_lock();
2798b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2799daeda1ccSPhilipp Reisner 	rcu_read_unlock();
28000f0601f4SLars Ellenberg 
28010f0601f4SLars Ellenberg 	/* feature disabled? */
2802daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2803e8299874SLars Ellenberg 		return false;
2804e3555d85SPhilipp Reisner 
280559767fbdSMichael Callahan 	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
2806b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2807ad3fee79SLars Ellenberg 
2808ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2809ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
28100f0601f4SLars Ellenberg 		unsigned long rs_left;
28110f0601f4SLars Ellenberg 		int i;
28120f0601f4SLars Ellenberg 
2813b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
28140f0601f4SLars Ellenberg 
28150f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
28160f0601f4SLars Ellenberg 		 * approx. */
2817b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
28182649f080SLars Ellenberg 
2819b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2820b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
28212649f080SLars Ellenberg 		else
2822b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
28230f0601f4SLars Ellenberg 
2824b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
28250f0601f4SLars Ellenberg 		if (!dt)
28260f0601f4SLars Ellenberg 			dt++;
2827b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
28280f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
28290f0601f4SLars Ellenberg 
2830daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2831e8299874SLars Ellenberg 			return true;
28320f0601f4SLars Ellenberg 	}
2833e8299874SLars Ellenberg 	return false;
28340f0601f4SLars Ellenberg }
28350f0601f4SLars Ellenberg 
2836bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2837b411b363SPhilipp Reisner {
28389f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2839b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2840b411b363SPhilipp Reisner 	sector_t sector;
28414a76b161SAndreas Gruenbacher 	sector_t capacity;
2842db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2843b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2844b18b37beSPhilipp Reisner 	int size, verb;
2845b411b363SPhilipp Reisner 	unsigned int fault_type;
2846e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28474a76b161SAndreas Gruenbacher 
28489f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28499f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28504a76b161SAndreas Gruenbacher 		return -EIO;
28519f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2852b30ab791SAndreas Gruenbacher 	capacity = drbd_get_capacity(device->this_bdev);
2853b411b363SPhilipp Reisner 
2854b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2855b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2856b411b363SPhilipp Reisner 
2857c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2858d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2859b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
286082bc0194SAndreas Gruenbacher 		return -EINVAL;
2861b411b363SPhilipp Reisner 	}
2862b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2863d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2864b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
286582bc0194SAndreas Gruenbacher 		return -EINVAL;
2866b411b363SPhilipp Reisner 	}
2867b411b363SPhilipp Reisner 
2868b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2869b18b37beSPhilipp Reisner 		verb = 1;
2870e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2871b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
287269a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2873b18b37beSPhilipp Reisner 			break;
2874700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2875b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2876b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2877b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
287869a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2879b18b37beSPhilipp Reisner 			break;
2880b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2881b18b37beSPhilipp Reisner 			verb = 0;
2882b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
288369a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2884b18b37beSPhilipp Reisner 			break;
2885b18b37beSPhilipp Reisner 		default:
288649ba9b1bSAndreas Gruenbacher 			BUG();
2887b18b37beSPhilipp Reisner 		}
2888b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2889d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2890b411b363SPhilipp Reisner 			    "no local data.\n");
2891b18b37beSPhilipp Reisner 
2892a821cc4aSLars Ellenberg 		/* drain possibly payload */
289369a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2894b411b363SPhilipp Reisner 	}
2895b411b363SPhilipp Reisner 
2896b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2897b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2898b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2899a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
29009104d31aSLars Ellenberg 			size, GFP_NOIO);
2901db830c46SAndreas Gruenbacher 	if (!peer_req) {
2902b30ab791SAndreas Gruenbacher 		put_ldev(device);
290382bc0194SAndreas Gruenbacher 		return -ENOMEM;
2904b411b363SPhilipp Reisner 	}
2905b411b363SPhilipp Reisner 
2906e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2907b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2908a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2909b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
291080a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
291121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
291280a40e43SLars Ellenberg 		goto submit;
291380a40e43SLars Ellenberg 
2914700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2915700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2916700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2917700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2918700ca8c0SPhilipp Reisner 		   the block... */
2919700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2920d769a992SGustavo A. R. Silva 		/* fall through */
2921b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2922a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2923b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
29245f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2925b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2926b411b363SPhilipp Reisner 		break;
2927b411b363SPhilipp Reisner 
2928b411b363SPhilipp Reisner 	case P_OV_REPLY:
2929b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2930b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2931e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2932b411b363SPhilipp Reisner 		if (!di)
2933b411b363SPhilipp Reisner 			goto out_free_e;
2934b411b363SPhilipp Reisner 
2935e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2936b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2937b411b363SPhilipp Reisner 
2938db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2939db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2940c36c3cedSLars Ellenberg 
29419f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2942b411b363SPhilipp Reisner 			goto out_free_e;
2943b411b363SPhilipp Reisner 
2944e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29459f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2946a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29475f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2948b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2949aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2950aaaba345SLars Ellenberg 			device->use_csums = true;
2951e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29522649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2953b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2954a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2955b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29560f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29570f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29580f0601f4SLars Ellenberg 			goto submit_for_resync;
2959b411b363SPhilipp Reisner 		}
2960b411b363SPhilipp Reisner 		break;
2961b411b363SPhilipp Reisner 
2962b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2963b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29649f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2965de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2966de228bbaSLars Ellenberg 			int i;
2967b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2968b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2969b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2970b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2971de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2972b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2973b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2974de228bbaSLars Ellenberg 			}
2975d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2976b411b363SPhilipp Reisner 					(unsigned long long)sector);
2977b411b363SPhilipp Reisner 		}
2978a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2979b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2980b411b363SPhilipp Reisner 		break;
2981b411b363SPhilipp Reisner 
2982b411b363SPhilipp Reisner 	default:
298349ba9b1bSAndreas Gruenbacher 		BUG();
2984b411b363SPhilipp Reisner 	}
2985b411b363SPhilipp Reisner 
29860f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29870f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29880f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29890f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29900f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29910f0601f4SLars Ellenberg 	 *
29920f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29930f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29940f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29950f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29960f0601f4SLars Ellenberg 	 * a while, anyways.
29970f0601f4SLars Ellenberg 	 */
2998b411b363SPhilipp Reisner 
29990f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
30000f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
30010f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
30020f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
30030f0601f4SLars Ellenberg 	 *
30040f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
30050f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
30060f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
30070f0601f4SLars Ellenberg 	 */
3008c5a2c150SLars Ellenberg 
3009c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
3010c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
3011c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
3012c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
3013c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
3014c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
3015c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
3016c5a2c150SLars Ellenberg 
3017944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
3018ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
3019ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
3020e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
3021944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
3022b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
302380a40e43SLars Ellenberg 		goto out_free_e;
3024b411b363SPhilipp Reisner 
30250f0601f4SLars Ellenberg submit_for_resync:
3026b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
30270f0601f4SLars Ellenberg 
302880a40e43SLars Ellenberg submit:
3029944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
3030b30ab791SAndreas Gruenbacher 	inc_unacked(device);
3031bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
3032bb3cc85eSMike Christie 				     fault_type) == 0)
303382bc0194SAndreas Gruenbacher 		return 0;
3034b411b363SPhilipp Reisner 
303510f6d992SLars Ellenberg 	/* don't care for the reason here */
3036d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
3037c5a2c150SLars Ellenberg 
3038c5a2c150SLars Ellenberg out_free_e:
30390500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
3040a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
30410500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
304222cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
304322cc37a9SLars Ellenberg 
3044b30ab791SAndreas Gruenbacher 	put_ldev(device);
3045b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
304682bc0194SAndreas Gruenbacher 	return -EIO;
3047b411b363SPhilipp Reisner }
3048b411b363SPhilipp Reisner 
304969a22773SAndreas Gruenbacher /**
305069a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
305169a22773SAndreas Gruenbacher  */
305269a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3053b411b363SPhilipp Reisner {
305469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3055b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3056b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
305744ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3058b411b363SPhilipp Reisner 
3059b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3060b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3061b411b363SPhilipp Reisner 
3062b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3063b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3064b411b363SPhilipp Reisner 
306544ed167dSPhilipp Reisner 	rcu_read_lock();
306669a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
306744ed167dSPhilipp Reisner 	rcu_read_unlock();
306844ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3069b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3070b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3071b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
307244ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3073d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3074b411b363SPhilipp Reisner 		break;
3075b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3076b411b363SPhilipp Reisner 		break;
3077b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3078b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3079b411b363SPhilipp Reisner 			rv = -1;
3080b411b363SPhilipp Reisner 			break;
3081b411b363SPhilipp Reisner 		}
3082b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3083b411b363SPhilipp Reisner 			rv =  1;
3084b411b363SPhilipp Reisner 			break;
3085b411b363SPhilipp Reisner 		}
3086e16fb3a8SGustavo A. R. Silva 		/* Else fall through - to one of the other strategies... */
3087b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3088b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3089b411b363SPhilipp Reisner 			rv = 1;
3090b411b363SPhilipp Reisner 			break;
3091b411b363SPhilipp Reisner 		}
3092b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3093b411b363SPhilipp Reisner 			rv = -1;
3094b411b363SPhilipp Reisner 			break;
3095b411b363SPhilipp Reisner 		}
3096b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3097d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3098b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3099d769a992SGustavo A. R. Silva 		/* fall through */
3100b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3101b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
310269a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3103b411b363SPhilipp Reisner 				? -1 : 1;
3104b411b363SPhilipp Reisner 			break;
3105b411b363SPhilipp Reisner 		} else {
3106b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3107b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3108b411b363SPhilipp Reisner 		}
310944ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3110b411b363SPhilipp Reisner 			break;
3111e16fb3a8SGustavo A. R. Silva 		/* else, fall through */
3112b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3113b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3114b411b363SPhilipp Reisner 			rv = -1;
3115b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3116b411b363SPhilipp Reisner 			rv =  1;
3117b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3118b411b363SPhilipp Reisner 		     /* Well, then use something else. */
311969a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3120b411b363SPhilipp Reisner 				? -1 : 1;
3121b411b363SPhilipp Reisner 		break;
3122b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3123b411b363SPhilipp Reisner 		rv = -1;
3124b411b363SPhilipp Reisner 		break;
3125b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3126b411b363SPhilipp Reisner 		rv =  1;
3127b411b363SPhilipp Reisner 	}
3128b411b363SPhilipp Reisner 
3129b411b363SPhilipp Reisner 	return rv;
3130b411b363SPhilipp Reisner }
3131b411b363SPhilipp Reisner 
313269a22773SAndreas Gruenbacher /**
313369a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
313469a22773SAndreas Gruenbacher  */
313569a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3136b411b363SPhilipp Reisner {
313769a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31386184ea21SAndreas Gruenbacher 	int hg, rv = -100;
313944ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3140b411b363SPhilipp Reisner 
314144ed167dSPhilipp Reisner 	rcu_read_lock();
314269a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
314344ed167dSPhilipp Reisner 	rcu_read_unlock();
314444ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3145b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3146b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3147b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3148b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3149b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
315044ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3151d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3152b411b363SPhilipp Reisner 		break;
3153b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3154b411b363SPhilipp Reisner 		break;
3155b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
315669a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3157b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3158b411b363SPhilipp Reisner 			rv = hg;
3159b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3160b411b363SPhilipp Reisner 			rv = hg;
3161b411b363SPhilipp Reisner 		break;
3162b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
316369a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3164b411b363SPhilipp Reisner 		break;
3165b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3166b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3167b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
316869a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3169b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3170bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3171bb437946SAndreas Gruenbacher 
3172b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3173b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3174b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3175b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3176bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3177b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3178b411b363SPhilipp Reisner 			} else {
3179d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3180b411b363SPhilipp Reisner 				rv = hg;
3181b411b363SPhilipp Reisner 			}
3182b411b363SPhilipp Reisner 		} else
3183b411b363SPhilipp Reisner 			rv = hg;
3184b411b363SPhilipp Reisner 	}
3185b411b363SPhilipp Reisner 
3186b411b363SPhilipp Reisner 	return rv;
3187b411b363SPhilipp Reisner }
3188b411b363SPhilipp Reisner 
318969a22773SAndreas Gruenbacher /**
319069a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
319169a22773SAndreas Gruenbacher  */
319269a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3193b411b363SPhilipp Reisner {
319469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31956184ea21SAndreas Gruenbacher 	int hg, rv = -100;
319644ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3197b411b363SPhilipp Reisner 
319844ed167dSPhilipp Reisner 	rcu_read_lock();
319969a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
320044ed167dSPhilipp Reisner 	rcu_read_unlock();
320144ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3202b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3203b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3204b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3205b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3206b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3207b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3208b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
320944ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3210d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3211b411b363SPhilipp Reisner 		break;
3212b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
321369a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3214b411b363SPhilipp Reisner 		break;
3215b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3216b411b363SPhilipp Reisner 		break;
3217b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
321869a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3219b411b363SPhilipp Reisner 		if (hg == -1) {
3220bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3221bb437946SAndreas Gruenbacher 
3222b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3223b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3224b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3225b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3226bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3227b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3228b411b363SPhilipp Reisner 			} else {
3229d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3230b411b363SPhilipp Reisner 				rv = hg;
3231b411b363SPhilipp Reisner 			}
3232b411b363SPhilipp Reisner 		} else
3233b411b363SPhilipp Reisner 			rv = hg;
3234b411b363SPhilipp Reisner 	}
3235b411b363SPhilipp Reisner 
3236b411b363SPhilipp Reisner 	return rv;
3237b411b363SPhilipp Reisner }
3238b411b363SPhilipp Reisner 
3239b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3240b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3241b411b363SPhilipp Reisner {
3242b411b363SPhilipp Reisner 	if (!uuid) {
3243d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3244b411b363SPhilipp Reisner 		return;
3245b411b363SPhilipp Reisner 	}
3246d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3247b411b363SPhilipp Reisner 	     text,
3248b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3249b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3250b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3251b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3252b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3253b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3254b411b363SPhilipp Reisner }
3255b411b363SPhilipp Reisner 
3256b411b363SPhilipp Reisner /*
3257b411b363SPhilipp Reisner   100	after split brain try auto recover
3258b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3259b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3260b411b363SPhilipp Reisner     0	no Sync
3261b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3262b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3263b411b363SPhilipp Reisner  -100	after split brain, disconnect
3264b411b363SPhilipp Reisner -1000	unrelated data
32654a23f264SPhilipp Reisner -1091   requires proto 91
32664a23f264SPhilipp Reisner -1096   requires proto 96
3267b411b363SPhilipp Reisner  */
3268f2d3d75bSLars Ellenberg 
3269f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3270b411b363SPhilipp Reisner {
327144a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
327244a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3273b411b363SPhilipp Reisner 	u64 self, peer;
3274b411b363SPhilipp Reisner 	int i, j;
3275b411b363SPhilipp Reisner 
3276b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3277b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3278b411b363SPhilipp Reisner 
3279b411b363SPhilipp Reisner 	*rule_nr = 10;
3280b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3281b411b363SPhilipp Reisner 		return 0;
3282b411b363SPhilipp Reisner 
3283b411b363SPhilipp Reisner 	*rule_nr = 20;
3284b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3285b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3286b411b363SPhilipp Reisner 		return -2;
3287b411b363SPhilipp Reisner 
3288b411b363SPhilipp Reisner 	*rule_nr = 30;
3289b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3290b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3291b411b363SPhilipp Reisner 		return 2;
3292b411b363SPhilipp Reisner 
3293b411b363SPhilipp Reisner 	if (self == peer) {
3294b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3295b411b363SPhilipp Reisner 
3296b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3297b411b363SPhilipp Reisner 
329844a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32994a23f264SPhilipp Reisner 				return -1091;
3300b411b363SPhilipp Reisner 
3301b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3302b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3303d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3304b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3305b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3306b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3307b411b363SPhilipp Reisner 
3308b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3309b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3310b411b363SPhilipp Reisner 				*rule_nr = 34;
3311b411b363SPhilipp Reisner 			} else {
3312d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3313b411b363SPhilipp Reisner 				*rule_nr = 36;
3314b411b363SPhilipp Reisner 			}
3315b411b363SPhilipp Reisner 
3316b411b363SPhilipp Reisner 			return 1;
3317b411b363SPhilipp Reisner 		}
3318b411b363SPhilipp Reisner 
3319b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3320b411b363SPhilipp Reisner 
332144a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33224a23f264SPhilipp Reisner 				return -1091;
3323b411b363SPhilipp Reisner 
3324b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3325b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3326d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3327b411b363SPhilipp Reisner 
3328b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3329b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3330b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3331b411b363SPhilipp Reisner 
3332b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3333b411b363SPhilipp Reisner 				*rule_nr = 35;
3334b411b363SPhilipp Reisner 			} else {
3335d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3336b411b363SPhilipp Reisner 				*rule_nr = 37;
3337b411b363SPhilipp Reisner 			}
3338b411b363SPhilipp Reisner 
3339b411b363SPhilipp Reisner 			return -1;
3340b411b363SPhilipp Reisner 		}
3341b411b363SPhilipp Reisner 
3342b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3343b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3344b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3345b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3346b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3347b411b363SPhilipp Reisner 		*rule_nr = 40;
3348b411b363SPhilipp Reisner 
3349f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3350f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3351f2d3d75bSLars Ellenberg 		if (rct == 0)
3352f2d3d75bSLars Ellenberg 			return 0;
3353f2d3d75bSLars Ellenberg 
3354f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3355f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3356f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3357f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3358f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3359f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3360f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3361f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3362f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3363f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3364f2d3d75bSLars Ellenberg 			}
3365f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3366f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3367f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3368f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3369f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3370f2d3d75bSLars Ellenberg 				return -100;
3371f2d3d75bSLars Ellenberg 			}
3372f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3373f2d3d75bSLars Ellenberg 				return 1;
3374f2d3d75bSLars Ellenberg 			return -1;
3375f2d3d75bSLars Ellenberg 		}
3376f2d3d75bSLars Ellenberg 
3377f2d3d75bSLars Ellenberg 		/* Both are secondary.
3378f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3379f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3380b411b363SPhilipp Reisner 		switch (rct) {
3381f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3382b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3383b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3384b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
338544a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3386b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3387b411b363SPhilipp Reisner 		}
3388b411b363SPhilipp Reisner 	}
3389b411b363SPhilipp Reisner 
3390b411b363SPhilipp Reisner 	*rule_nr = 50;
3391b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3392b411b363SPhilipp Reisner 	if (self == peer)
3393b411b363SPhilipp Reisner 		return -1;
3394b411b363SPhilipp Reisner 
3395b411b363SPhilipp Reisner 	*rule_nr = 51;
3396b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3397b411b363SPhilipp Reisner 	if (self == peer) {
339844a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3399b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3400b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3401b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3402b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3403b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3404b411b363SPhilipp Reisner 
340544a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34064a23f264SPhilipp Reisner 				return -1091;
3407b411b363SPhilipp Reisner 
3408b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3409b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
34104a23f264SPhilipp Reisner 
3411d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3412b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
34134a23f264SPhilipp Reisner 
3414b411b363SPhilipp Reisner 			return -1;
3415b411b363SPhilipp Reisner 		}
3416b411b363SPhilipp Reisner 	}
3417b411b363SPhilipp Reisner 
3418b411b363SPhilipp Reisner 	*rule_nr = 60;
3419b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3420b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3421b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3422b411b363SPhilipp Reisner 		if (self == peer)
3423b411b363SPhilipp Reisner 			return -2;
3424b411b363SPhilipp Reisner 	}
3425b411b363SPhilipp Reisner 
3426b411b363SPhilipp Reisner 	*rule_nr = 70;
3427b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3428b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3429b411b363SPhilipp Reisner 	if (self == peer)
3430b411b363SPhilipp Reisner 		return 1;
3431b411b363SPhilipp Reisner 
3432b411b363SPhilipp Reisner 	*rule_nr = 71;
3433b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3434b411b363SPhilipp Reisner 	if (self == peer) {
343544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3436b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3437b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3438b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3439b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3440b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3441b411b363SPhilipp Reisner 
344244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34434a23f264SPhilipp Reisner 				return -1091;
3444b411b363SPhilipp Reisner 
3445b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3446b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3447b411b363SPhilipp Reisner 
3448d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3449b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3450b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3451b411b363SPhilipp Reisner 
3452b411b363SPhilipp Reisner 			return 1;
3453b411b363SPhilipp Reisner 		}
3454b411b363SPhilipp Reisner 	}
3455b411b363SPhilipp Reisner 
3456b411b363SPhilipp Reisner 
3457b411b363SPhilipp Reisner 	*rule_nr = 80;
3458b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3459b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3460b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3461b411b363SPhilipp Reisner 		if (self == peer)
3462b411b363SPhilipp Reisner 			return 2;
3463b411b363SPhilipp Reisner 	}
3464b411b363SPhilipp Reisner 
3465b411b363SPhilipp Reisner 	*rule_nr = 90;
3466b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3467b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3468b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3469b411b363SPhilipp Reisner 		return 100;
3470b411b363SPhilipp Reisner 
3471b411b363SPhilipp Reisner 	*rule_nr = 100;
3472b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3473b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3474b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3475b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3476b411b363SPhilipp Reisner 			if (self == peer)
3477b411b363SPhilipp Reisner 				return -100;
3478b411b363SPhilipp Reisner 		}
3479b411b363SPhilipp Reisner 	}
3480b411b363SPhilipp Reisner 
3481b411b363SPhilipp Reisner 	return -1000;
3482b411b363SPhilipp Reisner }
3483b411b363SPhilipp Reisner 
3484b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3485b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3486b411b363SPhilipp Reisner  */
348769a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
348869a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3489b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3490b411b363SPhilipp Reisner {
349169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3492b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3493b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
349444ed167dSPhilipp Reisner 	struct net_conf *nc;
3495d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3496b411b363SPhilipp Reisner 
3497b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3498b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3499b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3500b411b363SPhilipp Reisner 
3501d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
35029f2247bbSPhilipp Reisner 
3503b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3504b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3505b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3506b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3507b411b363SPhilipp Reisner 
3508f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3509b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3510b411b363SPhilipp Reisner 
3511d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3512b411b363SPhilipp Reisner 
3513b411b363SPhilipp Reisner 	if (hg == -1000) {
3514d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3515b411b363SPhilipp Reisner 		return C_MASK;
3516b411b363SPhilipp Reisner 	}
3517f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3518f2d3d75bSLars Ellenberg 		int proto, fflags;
3519f2d3d75bSLars Ellenberg 		hg = -hg;
3520f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3521f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3522f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3523f2d3d75bSLars Ellenberg 					proto, fflags);
3524f2d3d75bSLars Ellenberg 		return C_MASK;
3525f2d3d75bSLars Ellenberg 	}
35264a23f264SPhilipp Reisner 	if (hg < -1000) {
3527d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3528b411b363SPhilipp Reisner 		return C_MASK;
3529b411b363SPhilipp Reisner 	}
3530b411b363SPhilipp Reisner 
3531b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3532b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3533b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3534b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3535b411b363SPhilipp Reisner 		if (f)
3536b411b363SPhilipp Reisner 			hg = hg*2;
3537d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3538b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3539b411b363SPhilipp Reisner 	}
3540b411b363SPhilipp Reisner 
35413a11a487SAdam Gandelman 	if (abs(hg) == 100)
3542b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35433a11a487SAdam Gandelman 
354444ed167dSPhilipp Reisner 	rcu_read_lock();
354569a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3546d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3547d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3548d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3549d29e89e3SRoland Kammerer 	rcu_read_unlock();
355044ed167dSPhilipp Reisner 
3551d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3552b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3553b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3554b411b363SPhilipp Reisner 		int forced = (hg == -100);
3555b411b363SPhilipp Reisner 
3556b411b363SPhilipp Reisner 		switch (pcount) {
3557b411b363SPhilipp Reisner 		case 0:
355869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3559b411b363SPhilipp Reisner 			break;
3560b411b363SPhilipp Reisner 		case 1:
356169a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3562b411b363SPhilipp Reisner 			break;
3563b411b363SPhilipp Reisner 		case 2:
356469a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3565b411b363SPhilipp Reisner 			break;
3566b411b363SPhilipp Reisner 		}
3567b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3568d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3569b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3570b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3571b411b363SPhilipp Reisner 			if (forced) {
3572d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3573b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3574b411b363SPhilipp Reisner 				hg = hg*2;
3575b411b363SPhilipp Reisner 			}
3576b411b363SPhilipp Reisner 		}
3577b411b363SPhilipp Reisner 	}
3578b411b363SPhilipp Reisner 
3579b411b363SPhilipp Reisner 	if (hg == -100) {
3580b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3581b411b363SPhilipp Reisner 			hg = -1;
3582b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3583b411b363SPhilipp Reisner 			hg = 1;
3584b411b363SPhilipp Reisner 
3585b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3586d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3587b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3588b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3589b411b363SPhilipp Reisner 	}
3590b411b363SPhilipp Reisner 
3591b411b363SPhilipp Reisner 	if (hg == -100) {
3592580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3593580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3594580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3595580b9767SLars Ellenberg 		 * to that disk, in a way... */
3596d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3597b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3598b411b363SPhilipp Reisner 		return C_MASK;
3599b411b363SPhilipp Reisner 	}
3600b411b363SPhilipp Reisner 
3601b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3602d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3603b411b363SPhilipp Reisner 		return C_MASK;
3604b411b363SPhilipp Reisner 	}
3605b411b363SPhilipp Reisner 
3606b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3607b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
360844ed167dSPhilipp Reisner 		switch (rr_conflict) {
3609b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3610b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3611b411b363SPhilipp Reisner 			/* fall through */
3612b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3613d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3614b411b363SPhilipp Reisner 			return C_MASK;
3615b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3616d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3617b411b363SPhilipp Reisner 			     "assumption\n");
3618b411b363SPhilipp Reisner 		}
3619b411b363SPhilipp Reisner 	}
3620b411b363SPhilipp Reisner 
362169a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3622cf14c2e9SPhilipp Reisner 		if (hg == 0)
3623d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3624cf14c2e9SPhilipp Reisner 		else
3625d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3626cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3627cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3628cf14c2e9SPhilipp Reisner 		return C_MASK;
3629cf14c2e9SPhilipp Reisner 	}
3630cf14c2e9SPhilipp Reisner 
3631b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3632d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3633b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
363420ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3635b411b363SPhilipp Reisner 			return C_MASK;
3636b411b363SPhilipp Reisner 	}
3637b411b363SPhilipp Reisner 
3638b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3639b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3640b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3641b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3642b411b363SPhilipp Reisner 	} else {
3643b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3644b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3645d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3646b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3647b411b363SPhilipp Reisner 		}
3648b411b363SPhilipp Reisner 	}
3649b411b363SPhilipp Reisner 
3650b411b363SPhilipp Reisner 	return rv;
3651b411b363SPhilipp Reisner }
3652b411b363SPhilipp Reisner 
3653f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3654b411b363SPhilipp Reisner {
3655b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3656f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3657f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3658b411b363SPhilipp Reisner 
3659b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3660f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3661f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3662b411b363SPhilipp Reisner 
3663b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3664f179d76dSPhilipp Reisner 	return peer;
3665b411b363SPhilipp Reisner }
3666b411b363SPhilipp Reisner 
3667bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3668b411b363SPhilipp Reisner {
3669e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3670036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3671036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3672036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3673036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36743d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36757aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3676b411b363SPhilipp Reisner 
3677b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3678b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3679b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3680b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3681b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3682cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36836139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3684cf14c2e9SPhilipp Reisner 
3685bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
368686db0618SAndreas Gruenbacher 		int err;
368786db0618SAndreas Gruenbacher 
368888104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
368986db0618SAndreas Gruenbacher 			return -EIO;
3690bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
369186db0618SAndreas Gruenbacher 		if (err)
369286db0618SAndreas Gruenbacher 			return err;
369388104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3694036b17eaSPhilipp Reisner 	}
369586db0618SAndreas Gruenbacher 
36967d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3697bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3698cf14c2e9SPhilipp Reisner 
3699cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3700bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3701b411b363SPhilipp Reisner 
370244ed167dSPhilipp Reisner 		rcu_read_lock();
3703bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
370444ed167dSPhilipp Reisner 
3705036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
37061ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
370744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3708b411b363SPhilipp Reisner 		}
3709b411b363SPhilipp Reisner 
3710f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
37111ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
371244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3713b411b363SPhilipp Reisner 		}
3714b411b363SPhilipp Reisner 
3715f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
37161ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
371744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3718b411b363SPhilipp Reisner 		}
3719b411b363SPhilipp Reisner 
3720f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
37211ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
372244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3723b411b363SPhilipp Reisner 		}
3724b411b363SPhilipp Reisner 
37256139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
37261ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
372744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3728b411b363SPhilipp Reisner 		}
3729b411b363SPhilipp Reisner 
373044ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
37311ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
373244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3733b411b363SPhilipp Reisner 		}
3734b411b363SPhilipp Reisner 
3735036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
37361ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3737036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3738036b17eaSPhilipp Reisner 		}
3739036b17eaSPhilipp Reisner 
374086db0618SAndreas Gruenbacher 		rcu_read_unlock();
3741fbc12f45SAndreas Gruenbacher 	}
37427d4c782cSAndreas Gruenbacher 
37437d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37447d4c782cSAndreas Gruenbacher 		int hash_size;
37457d4c782cSAndreas Gruenbacher 
37467d4c782cSAndreas Gruenbacher 		/*
37477d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37487d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37497d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37507d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37517d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37527d4c782cSAndreas Gruenbacher 		 * change.
37537d4c782cSAndreas Gruenbacher 		 */
37547d4c782cSAndreas Gruenbacher 
37553d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37561b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37571b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37581ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37597d4c782cSAndreas Gruenbacher 				 integrity_alg);
3760b411b363SPhilipp Reisner 			goto disconnect;
3761b411b363SPhilipp Reisner 		}
3762b411b363SPhilipp Reisner 
37633d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37647d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37657d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37667d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37671ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37687d4c782cSAndreas Gruenbacher 			goto disconnect;
37697d4c782cSAndreas Gruenbacher 		}
37707d4c782cSAndreas Gruenbacher 	}
37717d4c782cSAndreas Gruenbacher 
37727d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37737d4c782cSAndreas Gruenbacher 	if (!new_net_conf) {
37741ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Allocation of new net_conf failed\n");
3775b411b363SPhilipp Reisner 		goto disconnect;
3776b411b363SPhilipp Reisner 	}
3777b411b363SPhilipp Reisner 
3778bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37790500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3780bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37817d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3782b411b363SPhilipp Reisner 
37837d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37847d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37857d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37867d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37877d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3788b411b363SPhilipp Reisner 
3789bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37900500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3791bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3792b411b363SPhilipp Reisner 
37933d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3794bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3795bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3796bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3797bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3798bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3799b411b363SPhilipp Reisner 
38007d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
38011ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
38027d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3803b411b363SPhilipp Reisner 
38047d4c782cSAndreas Gruenbacher 	synchronize_rcu();
38057d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
380682bc0194SAndreas Gruenbacher 	return 0;
3807b411b363SPhilipp Reisner 
380844ed167dSPhilipp Reisner disconnect_rcu_unlock:
380944ed167dSPhilipp Reisner 	rcu_read_unlock();
3810b411b363SPhilipp Reisner disconnect:
38113d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3812036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3813036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3814bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
381582bc0194SAndreas Gruenbacher 	return -EIO;
3816b411b363SPhilipp Reisner }
3817b411b363SPhilipp Reisner 
3818b411b363SPhilipp Reisner /* helper function
3819b411b363SPhilipp Reisner  * input: alg name, feature name
3820b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3821b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3822b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
38233d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
38243d0e6375SKees Cook 		const struct drbd_device *device,
3825b411b363SPhilipp Reisner 		const char *alg, const char *name)
3826b411b363SPhilipp Reisner {
38273d0e6375SKees Cook 	struct crypto_shash *tfm;
3828b411b363SPhilipp Reisner 
3829b411b363SPhilipp Reisner 	if (!alg[0])
3830b411b363SPhilipp Reisner 		return NULL;
3831b411b363SPhilipp Reisner 
38323d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3833b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3834d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3835b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3836b411b363SPhilipp Reisner 		return tfm;
3837b411b363SPhilipp Reisner 	}
3838b411b363SPhilipp Reisner 	return tfm;
3839b411b363SPhilipp Reisner }
3840b411b363SPhilipp Reisner 
3841bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3842b411b363SPhilipp Reisner {
3843bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
38444a76b161SAndreas Gruenbacher 	int size = pi->size;
38454a76b161SAndreas Gruenbacher 
38464a76b161SAndreas Gruenbacher 	while (size) {
38474a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3848bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38494a76b161SAndreas Gruenbacher 		if (s <= 0) {
38504a76b161SAndreas Gruenbacher 			if (s < 0)
38514a76b161SAndreas Gruenbacher 				return s;
38524a76b161SAndreas Gruenbacher 			break;
38534a76b161SAndreas Gruenbacher 		}
38544a76b161SAndreas Gruenbacher 		size -= s;
38554a76b161SAndreas Gruenbacher 	}
38564a76b161SAndreas Gruenbacher 	if (size)
38574a76b161SAndreas Gruenbacher 		return -EIO;
38584a76b161SAndreas Gruenbacher 	return 0;
38594a76b161SAndreas Gruenbacher }
38604a76b161SAndreas Gruenbacher 
38614a76b161SAndreas Gruenbacher /*
38624a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38634a76b161SAndreas Gruenbacher  *
38644a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38654a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38664a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38674a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38684a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38694a76b161SAndreas Gruenbacher  *
38704a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38714a76b161SAndreas Gruenbacher  */
3872bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38734a76b161SAndreas Gruenbacher {
38741ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38752fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3876bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38774a76b161SAndreas Gruenbacher }
38784a76b161SAndreas Gruenbacher 
3879bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38804a76b161SAndreas Gruenbacher {
38819f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3882b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3883e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3884b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38853d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38863d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38872ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3888813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3889bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3890813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38916a365874SStephen Kitt 	unsigned int fifo_size = 0;
389282bc0194SAndreas Gruenbacher 	int err;
3893b411b363SPhilipp Reisner 
38949f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38959f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3896bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38979f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3898b411b363SPhilipp Reisner 
3899b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3900b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3901b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
39028e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
39038e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3904b411b363SPhilipp Reisner 
3905e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3906d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3907e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
390882bc0194SAndreas Gruenbacher 		return -EIO;
3909b411b363SPhilipp Reisner 	}
3910b411b363SPhilipp Reisner 
3911b411b363SPhilipp Reisner 	if (apv <= 88) {
3912e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3913e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39148e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3915e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3916e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39170b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
39188e26f9ccSPhilipp Reisner 	} else {
3919e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3920e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39210b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3922b411b363SPhilipp Reisner 	}
3923b411b363SPhilipp Reisner 
3924b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3925e658983aSAndreas Gruenbacher 	p = pi->data;
3926b411b363SPhilipp Reisner 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3927b411b363SPhilipp Reisner 
39289f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
392982bc0194SAndreas Gruenbacher 	if (err)
393082bc0194SAndreas Gruenbacher 		return err;
3931b411b363SPhilipp Reisner 
39320500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
39339f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3934b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3935daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3936daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3937b30ab791SAndreas Gruenbacher 			put_ldev(device);
39380500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3939d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3940daeda1ccSPhilipp Reisner 			return -ENOMEM;
3941f399002eSLars Ellenberg 		}
3942b411b363SPhilipp Reisner 
3943b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3944daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3945daeda1ccSPhilipp Reisner 
39466394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3947813472ceSPhilipp Reisner 	}
3948b411b363SPhilipp Reisner 
3949b411b363SPhilipp Reisner 	if (apv >= 88) {
3950b411b363SPhilipp Reisner 		if (apv == 88) {
39515de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3952d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39535de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3954b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3955813472ceSPhilipp Reisner 				err = -EIO;
3956813472ceSPhilipp Reisner 				goto reconnect;
3957b411b363SPhilipp Reisner 			}
3958b411b363SPhilipp Reisner 
39599f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3960813472ceSPhilipp Reisner 			if (err)
3961813472ceSPhilipp Reisner 				goto reconnect;
3962b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3963b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39640b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3965b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3966b411b363SPhilipp Reisner 
3967b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3968b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3969b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39700b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39710b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3972b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3973b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3974b411b363SPhilipp Reisner 		}
3975b411b363SPhilipp Reisner 
39762ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3977b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3978d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39792ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3980b411b363SPhilipp Reisner 				goto disconnect;
3981b411b363SPhilipp Reisner 			}
3982b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3983b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3984b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3985b411b363SPhilipp Reisner 				verify_tfm = NULL;
3986b411b363SPhilipp Reisner 				goto disconnect;
3987b411b363SPhilipp Reisner 			}
3988b411b363SPhilipp Reisner 		}
3989b411b363SPhilipp Reisner 
39902ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3991b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3992d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39932ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3994b411b363SPhilipp Reisner 				goto disconnect;
3995b411b363SPhilipp Reisner 			}
3996b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3997b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3998b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3999b411b363SPhilipp Reisner 				csums_tfm = NULL;
4000b411b363SPhilipp Reisner 				goto disconnect;
4001b411b363SPhilipp Reisner 			}
4002b411b363SPhilipp Reisner 		}
4003b411b363SPhilipp Reisner 
4004813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
4005daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
4006daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
4007daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
4008daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
4009778f271dSPhilipp Reisner 
4010daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
4011b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
4012813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
4013813472ceSPhilipp Reisner 				if (!new_plan) {
4014d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
4015b30ab791SAndreas Gruenbacher 					put_ldev(device);
4016778f271dSPhilipp Reisner 					goto disconnect;
4017778f271dSPhilipp Reisner 				}
4018778f271dSPhilipp Reisner 			}
40198e26f9ccSPhilipp Reisner 		}
4020b411b363SPhilipp Reisner 
402191fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
40222ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
40232ec91e0eSPhilipp Reisner 			if (!new_net_conf) {
4024d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new net_conf failed\n");
402591fd4dadSPhilipp Reisner 				goto disconnect;
402691fd4dadSPhilipp Reisner 			}
402791fd4dadSPhilipp Reisner 
40282ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
402991fd4dadSPhilipp Reisner 
4030b411b363SPhilipp Reisner 			if (verify_tfm) {
40312ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
40322ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
40333d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
40349f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
4035d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
4036b411b363SPhilipp Reisner 			}
4037b411b363SPhilipp Reisner 			if (csums_tfm) {
40382ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
40392ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
40403d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
40419f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
4042d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4043b411b363SPhilipp Reisner 			}
4044bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
4045778f271dSPhilipp Reisner 		}
4046b411b363SPhilipp Reisner 	}
4047b411b363SPhilipp Reisner 
4048813472ceSPhilipp Reisner 	if (new_disk_conf) {
4049b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4050b30ab791SAndreas Gruenbacher 		put_ldev(device);
4051b411b363SPhilipp Reisner 	}
4052813472ceSPhilipp Reisner 
4053813472ceSPhilipp Reisner 	if (new_plan) {
4054b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4055b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4056813472ceSPhilipp Reisner 	}
4057daeda1ccSPhilipp Reisner 
40580500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4059daeda1ccSPhilipp Reisner 	synchronize_rcu();
4060daeda1ccSPhilipp Reisner 	if (new_net_conf)
4061daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4062daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4063813472ceSPhilipp Reisner 	kfree(old_plan);
4064daeda1ccSPhilipp Reisner 
406582bc0194SAndreas Gruenbacher 	return 0;
4066b411b363SPhilipp Reisner 
4067813472ceSPhilipp Reisner reconnect:
4068813472ceSPhilipp Reisner 	if (new_disk_conf) {
4069b30ab791SAndreas Gruenbacher 		put_ldev(device);
4070813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4071813472ceSPhilipp Reisner 	}
40720500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4073813472ceSPhilipp Reisner 	return -EIO;
4074813472ceSPhilipp Reisner 
4075b411b363SPhilipp Reisner disconnect:
4076813472ceSPhilipp Reisner 	kfree(new_plan);
4077813472ceSPhilipp Reisner 	if (new_disk_conf) {
4078b30ab791SAndreas Gruenbacher 		put_ldev(device);
4079813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4080813472ceSPhilipp Reisner 	}
40810500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4082b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4083b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40843d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4085b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40863d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40879f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
408882bc0194SAndreas Gruenbacher 	return -EIO;
4089b411b363SPhilipp Reisner }
4090b411b363SPhilipp Reisner 
4091b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4092b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4093b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4094b411b363SPhilipp Reisner {
4095b411b363SPhilipp Reisner 	sector_t d;
4096b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4097b411b363SPhilipp Reisner 		return;
4098b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4099b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4100d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4101b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4102b411b363SPhilipp Reisner }
4103b411b363SPhilipp Reisner 
4104bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4105b411b363SPhilipp Reisner {
41069f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4107b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4108e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
41099104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4110e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
41116a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
411294c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4113b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4114e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4115b411b363SPhilipp Reisner 
41169f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
41179f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4118bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
41199f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
412094c43a13SLars Ellenberg 	cur_size = drbd_get_capacity(device->this_bdev);
41214a76b161SAndreas Gruenbacher 
4122b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4123b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
41246a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4125b411b363SPhilipp Reisner 
4126b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4127b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4128b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4129b411b363SPhilipp Reisner 
4130b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4131daeda1ccSPhilipp Reisner 		rcu_read_lock();
4132b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4133daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4134daeda1ccSPhilipp Reisner 
4135b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4136b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4137b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4138daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4139b411b363SPhilipp Reisner 
4140b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4141b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4142b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4143daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4144b411b363SPhilipp Reisner 
4145ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4146ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4147ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
414860bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
414960bac040SLars Ellenberg 		if (new_size < cur_size &&
4150b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4151ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
415260bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
415360bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41549f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4155b30ab791SAndreas Gruenbacher 			put_ldev(device);
415682bc0194SAndreas Gruenbacher 			return -EIO;
4157b411b363SPhilipp Reisner 		}
4158daeda1ccSPhilipp Reisner 
4159daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4160daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4161daeda1ccSPhilipp Reisner 
4162daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4163daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4164d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new disk_conf failed\n");
4165b30ab791SAndreas Gruenbacher 				put_ldev(device);
4166daeda1ccSPhilipp Reisner 				return -ENOMEM;
4167daeda1ccSPhilipp Reisner 			}
4168daeda1ccSPhilipp Reisner 
41690500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4170b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4171daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4172daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4173daeda1ccSPhilipp Reisner 
4174b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41750500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
4176daeda1ccSPhilipp Reisner 			synchronize_rcu();
4177daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
4178daeda1ccSPhilipp Reisner 
4179ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4180ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4181daeda1ccSPhilipp Reisner 		}
4182daeda1ccSPhilipp Reisner 
4183b30ab791SAndreas Gruenbacher 		put_ldev(device);
4184b411b363SPhilipp Reisner 	}
4185b411b363SPhilipp Reisner 
418620c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4187dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
418820c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4189dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
419020c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
419120c68fdeSLars Ellenberg 
4192e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4193b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41949104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4195b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4196b30ab791SAndreas Gruenbacher 		put_ldev(device);
4197e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
419882bc0194SAndreas Gruenbacher 			return -EIO;
4199b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4200b411b363SPhilipp Reisner 	} else {
42016a8d68b1SLars Ellenberg 		/*
42026a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
42036a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
42046a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
42056a8d68b1SLars Ellenberg 		 *
42066a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
42076a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
42086a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
42096a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
42106a8d68b1SLars Ellenberg 		 *
42116a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
42126a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
421394c43a13SLars Ellenberg 		 *
421494c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
421594c43a13SLars Ellenberg 		 * In which case we ignore this completely.
42166a8d68b1SLars Ellenberg 		 */
421794c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
42189104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
421994c43a13SLars Ellenberg 		if (new_size == 0) {
422094c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
422194c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
422294c43a13SLars Ellenberg 			/* nothing to do */
422394c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
422494c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
422594c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422694c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
422794c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
422894c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422994c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
423094c43a13SLars Ellenberg 			return -EIO;
423194c43a13SLars Ellenberg 		} else {
423294c43a13SLars Ellenberg 			/* I believe the peer, if
423394c43a13SLars Ellenberg 			 *  - I don't have a current size myself
423494c43a13SLars Ellenberg 			 *  - we agree on the size anyways
423594c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
423694c43a13SLars Ellenberg 			 *    and he has the only disk
423794c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
423894c43a13SLars Ellenberg 			 *    and he has the only disk,
423994c43a13SLars Ellenberg 			 *    which is larger than my current size
424094c43a13SLars Ellenberg 			 */
424194c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
424294c43a13SLars Ellenberg 		}
4243b411b363SPhilipp Reisner 	}
4244b411b363SPhilipp Reisner 
4245b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4246b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4247b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4248b411b363SPhilipp Reisner 			ldsc = 1;
4249b411b363SPhilipp Reisner 		}
4250b411b363SPhilipp Reisner 
4251b30ab791SAndreas Gruenbacher 		put_ldev(device);
4252b411b363SPhilipp Reisner 	}
4253b411b363SPhilipp Reisner 
4254b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4255b411b363SPhilipp Reisner 		if (be64_to_cpu(p->c_size) !=
4256b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) || ldsc) {
4257b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4258b411b363SPhilipp Reisner 			 * needs to know my new size... */
425969a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4260b411b363SPhilipp Reisner 		}
4261b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4262b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4263b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4264b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4265e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4266d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4267b411b363SPhilipp Reisner 				else
4268b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4269e89b591cSPhilipp Reisner 			} else
4270b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4271b411b363SPhilipp Reisner 		}
4272b411b363SPhilipp Reisner 	}
4273b411b363SPhilipp Reisner 
427482bc0194SAndreas Gruenbacher 	return 0;
4275b411b363SPhilipp Reisner }
4276b411b363SPhilipp Reisner 
4277bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4278b411b363SPhilipp Reisner {
42799f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4280b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4281e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4282b411b363SPhilipp Reisner 	u64 *p_uuid;
428362b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4284b411b363SPhilipp Reisner 
42859f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42869f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4287bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42889f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42894a76b161SAndreas Gruenbacher 
4290365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
4291063eacf8SJing Wang 	if (!p_uuid) {
4292d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc of p_uuid failed\n");
4293063eacf8SJing Wang 		return false;
4294063eacf8SJing Wang 	}
4295b411b363SPhilipp Reisner 
4296b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4297b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4298b411b363SPhilipp Reisner 
4299b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4300b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4301b411b363SPhilipp Reisner 
4302b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4303b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4304b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4305b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4306d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4307b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
43089f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
430982bc0194SAndreas Gruenbacher 		return -EIO;
4310b411b363SPhilipp Reisner 	}
4311b411b363SPhilipp Reisner 
4312b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4313b411b363SPhilipp Reisner 		int skip_initial_sync =
4314b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
43159f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4316b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4317b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4318b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4319d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4320b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
432120ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
432220ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4323b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4324b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4325b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4326b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4327b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
432862b0da3aSLars Ellenberg 			updated_uuids = 1;
4329b411b363SPhilipp Reisner 		}
4330b30ab791SAndreas Gruenbacher 		put_ldev(device);
4331b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4332b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
433318a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
433418a50fa2SPhilipp Reisner 		   for me. */
4335b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4336b411b363SPhilipp Reisner 	}
4337b411b363SPhilipp Reisner 
4338b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4339b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4340b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4341b411b363SPhilipp Reisner 	   new disk state... */
4342b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4343b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4344b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4345b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
434662b0da3aSLars Ellenberg 
434762b0da3aSLars Ellenberg 	if (updated_uuids)
4348b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4349b411b363SPhilipp Reisner 
435082bc0194SAndreas Gruenbacher 	return 0;
4351b411b363SPhilipp Reisner }
4352b411b363SPhilipp Reisner 
4353b411b363SPhilipp Reisner /**
4354b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4355b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4356b411b363SPhilipp Reisner  */
4357b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4358b411b363SPhilipp Reisner {
4359b411b363SPhilipp Reisner 	union drbd_state ms;
4360b411b363SPhilipp Reisner 
4361b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4362369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4363b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4364b411b363SPhilipp Reisner 
4365b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4366b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4367b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4368b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4369b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4370b411b363SPhilipp Reisner 	};
4371b411b363SPhilipp Reisner 
4372b411b363SPhilipp Reisner 	ms.i = ps.i;
4373b411b363SPhilipp Reisner 
4374b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4375b411b363SPhilipp Reisner 	ms.peer = ps.role;
4376b411b363SPhilipp Reisner 	ms.role = ps.peer;
4377b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4378b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4379b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4380b411b363SPhilipp Reisner 
4381b411b363SPhilipp Reisner 	return ms;
4382b411b363SPhilipp Reisner }
4383b411b363SPhilipp Reisner 
4384bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4385b411b363SPhilipp Reisner {
43869f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4387b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4388e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4389b411b363SPhilipp Reisner 	union drbd_state mask, val;
4390bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4391b411b363SPhilipp Reisner 
43929f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43939f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43944a76b161SAndreas Gruenbacher 		return -EIO;
43959f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43964a76b161SAndreas Gruenbacher 
4397b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4398b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4399b411b363SPhilipp Reisner 
44009f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4401b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
440269a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
440382bc0194SAndreas Gruenbacher 		return 0;
4404b411b363SPhilipp Reisner 	}
4405b411b363SPhilipp Reisner 
4406b411b363SPhilipp Reisner 	mask = convert_state(mask);
4407b411b363SPhilipp Reisner 	val = convert_state(val);
4408b411b363SPhilipp Reisner 
4409b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
441069a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4411047cd4a6SPhilipp Reisner 
4412b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4413b411b363SPhilipp Reisner 
441482bc0194SAndreas Gruenbacher 	return 0;
4415b411b363SPhilipp Reisner }
4416b411b363SPhilipp Reisner 
4417bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4418b411b363SPhilipp Reisner {
4419e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4420dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4421dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4422dfafcc8aSPhilipp Reisner 
4423dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4424dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4425dfafcc8aSPhilipp Reisner 
4426bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4427bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4428bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
442982bc0194SAndreas Gruenbacher 		return 0;
4430dfafcc8aSPhilipp Reisner 	}
4431dfafcc8aSPhilipp Reisner 
4432dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4433dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4434dfafcc8aSPhilipp Reisner 
4435bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4436bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4437dfafcc8aSPhilipp Reisner 
443882bc0194SAndreas Gruenbacher 	return 0;
4439dfafcc8aSPhilipp Reisner }
4440dfafcc8aSPhilipp Reisner 
4441bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4442b411b363SPhilipp Reisner {
44439f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4444b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4445e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
44464ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4447b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
444865d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4449b411b363SPhilipp Reisner 	int rv;
4450b411b363SPhilipp Reisner 
44519f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44529f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4453bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44549f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44554a76b161SAndreas Gruenbacher 
4456b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4457b411b363SPhilipp Reisner 
4458b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4459b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4460b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4461d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4462b411b363SPhilipp Reisner 	}
4463b411b363SPhilipp Reisner 
44640500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4465b411b363SPhilipp Reisner  retry:
4466b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44670500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4468b411b363SPhilipp Reisner 
4469668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4470545752d5SLars Ellenberg 	 * already decided to close the connection again,
4471545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4472545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
447358ffa580SLars Ellenberg 		return -ECONNRESET;
4474545752d5SLars Ellenberg 
447540424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
447640424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
447740424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
447840424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
447940424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
448040424e4aSLars Ellenberg 	 */
448140424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
448240424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4483e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4484e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4485e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4486e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4487e9ef7bb6SLars Ellenberg 		 * syncing states.
4488e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4489e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4490e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4491e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4492e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4493e9ef7bb6SLars Ellenberg 
4494e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4495e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4496e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4497e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4498e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4499b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4500b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
450182bc0194SAndreas Gruenbacher 			return 0;
4502e9ef7bb6SLars Ellenberg 		}
4503e9ef7bb6SLars Ellenberg 	}
4504e9ef7bb6SLars Ellenberg 
450502b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
450602b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
450702b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4508b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4509b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
451058ffa580SLars Ellenberg 		return 0;
451102b91b55SLars Ellenberg 	}
451202b91b55SLars Ellenberg 
4513e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4514e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4515e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4516e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4517e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4518e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4519e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4520e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4521e9ef7bb6SLars Ellenberg 
45224ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
45234ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4524b411b363SPhilipp Reisner 
452567531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
452667531718SPhilipp Reisner 		ns.conn = C_BEHIND;
452767531718SPhilipp Reisner 
4528fe43ed97SLars Ellenberg 	/* TODO:
4529fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4530fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4531fe43ed97SLars Ellenberg 	 *
4532fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4533fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4534fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4535fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4536fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4537fe43ed97SLars Ellenberg 	 *
4538fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4539fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4540fe43ed97SLars Ellenberg 	 *
4541fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4542fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4543fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4544fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4545fe43ed97SLars Ellenberg 	 */
4546fe43ed97SLars Ellenberg 
4547b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4548b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4549b411b363SPhilipp Reisner 		int cr; /* consider resync */
4550b411b363SPhilipp Reisner 
4551b411b363SPhilipp Reisner 		/* if we established a new connection */
45524ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4553b411b363SPhilipp Reisner 		/* if we had an established connection
4554b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45554ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4556b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45574ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4558b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4559a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4560b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4561b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4562b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45634ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4564b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4565b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4566b411b363SPhilipp Reisner 
4567b411b363SPhilipp Reisner 		if (cr)
456869a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4569b411b363SPhilipp Reisner 
4570b30ab791SAndreas Gruenbacher 		put_ldev(device);
45714ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45724ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4573b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4574b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4575b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4576d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4577b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4578580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4579b411b363SPhilipp Reisner 			} else {
45809f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
458182bc0194SAndreas Gruenbacher 					return -EIO;
45820b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45839f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
458482bc0194SAndreas Gruenbacher 				return -EIO;
4585b411b363SPhilipp Reisner 			}
4586b411b363SPhilipp Reisner 		}
4587b411b363SPhilipp Reisner 	}
4588b411b363SPhilipp Reisner 
45890500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4590b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4591b411b363SPhilipp Reisner 		goto retry;
4592b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4593b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4594b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4595b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45964ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4597b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45984ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4599b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4600b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
46018554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4602481c6f50SPhilipp Reisner 		   for temporal network outages! */
46030500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4604d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
46059f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4606b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4607b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
46089f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
460982bc0194SAndreas Gruenbacher 		return -EIO;
4610481c6f50SPhilipp Reisner 	}
4611b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4612b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
46130500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4614b411b363SPhilipp Reisner 
4615b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
46169f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
461782bc0194SAndreas Gruenbacher 		return -EIO;
4618b411b363SPhilipp Reisner 	}
4619b411b363SPhilipp Reisner 
46204ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
46214ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4622b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4623b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4624b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4625b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
462669a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
462769a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4628b411b363SPhilipp Reisner 		}
4629b411b363SPhilipp Reisner 	}
4630b411b363SPhilipp Reisner 
4631b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4632b411b363SPhilipp Reisner 
4633b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4634b411b363SPhilipp Reisner 
463582bc0194SAndreas Gruenbacher 	return 0;
4636b411b363SPhilipp Reisner }
4637b411b363SPhilipp Reisner 
4638bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4639b411b363SPhilipp Reisner {
46409f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4641b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4642e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
46434a76b161SAndreas Gruenbacher 
46449f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
46459f4fe9adSAndreas Gruenbacher 	if (!peer_device)
46464a76b161SAndreas Gruenbacher 		return -EIO;
46479f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4648b411b363SPhilipp Reisner 
4649b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4650b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4651b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4652b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4653b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4654b411b363SPhilipp Reisner 
46550b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4656b411b363SPhilipp Reisner 
4657b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4658b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4659b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4660b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4661b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4662b411b363SPhilipp Reisner 
4663b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4664b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4665b411b363SPhilipp Reisner 
4666b30ab791SAndreas Gruenbacher 		put_ldev(device);
4667b411b363SPhilipp Reisner 	} else
4668d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4669b411b363SPhilipp Reisner 
467082bc0194SAndreas Gruenbacher 	return 0;
4671b411b363SPhilipp Reisner }
4672b411b363SPhilipp Reisner 
46732c46407dSAndreas Gruenbacher /**
46742c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46752c46407dSAndreas Gruenbacher  *
46762c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46772c46407dSAndreas Gruenbacher  * code upon failure.
46782c46407dSAndreas Gruenbacher  */
46792c46407dSAndreas Gruenbacher static int
468069a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4681e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4682b411b363SPhilipp Reisner {
468350d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
468469a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4685e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
468650d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4687e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46882c46407dSAndreas Gruenbacher 	int err;
4689b411b363SPhilipp Reisner 
469050d0b1adSAndreas Gruenbacher 	if (want != size) {
469169a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46922c46407dSAndreas Gruenbacher 		return -EIO;
4693b411b363SPhilipp Reisner 	}
4694b411b363SPhilipp Reisner 	if (want == 0)
46952c46407dSAndreas Gruenbacher 		return 0;
469669a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
469782bc0194SAndreas Gruenbacher 	if (err)
46982c46407dSAndreas Gruenbacher 		return err;
4699b411b363SPhilipp Reisner 
470069a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4701b411b363SPhilipp Reisner 
4702b411b363SPhilipp Reisner 	c->word_offset += num_words;
4703b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4704b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4705b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4706b411b363SPhilipp Reisner 
47072c46407dSAndreas Gruenbacher 	return 1;
4708b411b363SPhilipp Reisner }
4709b411b363SPhilipp Reisner 
4710a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4711a02d1240SAndreas Gruenbacher {
4712a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4713a02d1240SAndreas Gruenbacher }
4714a02d1240SAndreas Gruenbacher 
4715a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4716a02d1240SAndreas Gruenbacher {
4717a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4718a02d1240SAndreas Gruenbacher }
4719a02d1240SAndreas Gruenbacher 
4720a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4721a02d1240SAndreas Gruenbacher {
4722a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4723a02d1240SAndreas Gruenbacher }
4724a02d1240SAndreas Gruenbacher 
47252c46407dSAndreas Gruenbacher /**
47262c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
47272c46407dSAndreas Gruenbacher  *
47282c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47292c46407dSAndreas Gruenbacher  * code upon failure.
47302c46407dSAndreas Gruenbacher  */
47312c46407dSAndreas Gruenbacher static int
473269a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4733b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4734c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4735c6d25cfeSPhilipp Reisner 		 unsigned int len)
4736b411b363SPhilipp Reisner {
4737b411b363SPhilipp Reisner 	struct bitstream bs;
4738b411b363SPhilipp Reisner 	u64 look_ahead;
4739b411b363SPhilipp Reisner 	u64 rl;
4740b411b363SPhilipp Reisner 	u64 tmp;
4741b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4742b411b363SPhilipp Reisner 	unsigned long e;
4743a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4744b411b363SPhilipp Reisner 	int have;
4745b411b363SPhilipp Reisner 	int bits;
4746b411b363SPhilipp Reisner 
4747a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4748b411b363SPhilipp Reisner 
4749b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4750b411b363SPhilipp Reisner 	if (bits < 0)
47512c46407dSAndreas Gruenbacher 		return -EIO;
4752b411b363SPhilipp Reisner 
4753b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4754b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4755b411b363SPhilipp Reisner 		if (bits <= 0)
47562c46407dSAndreas Gruenbacher 			return -EIO;
4757b411b363SPhilipp Reisner 
4758b411b363SPhilipp Reisner 		if (toggle) {
4759b411b363SPhilipp Reisner 			e = s + rl -1;
4760b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
476169a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47622c46407dSAndreas Gruenbacher 				return -EIO;
4763b411b363SPhilipp Reisner 			}
476469a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4765b411b363SPhilipp Reisner 		}
4766b411b363SPhilipp Reisner 
4767b411b363SPhilipp Reisner 		if (have < bits) {
476869a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4769b411b363SPhilipp Reisner 				have, bits, look_ahead,
4770b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4771b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47722c46407dSAndreas Gruenbacher 			return -EIO;
4773b411b363SPhilipp Reisner 		}
4774d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4775d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4776b411b363SPhilipp Reisner 			look_ahead >>= bits;
4777d2da5b0cSLars Ellenberg 		else
4778d2da5b0cSLars Ellenberg 			look_ahead = 0;
4779b411b363SPhilipp Reisner 		have -= bits;
4780b411b363SPhilipp Reisner 
4781b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4782b411b363SPhilipp Reisner 		if (bits < 0)
47832c46407dSAndreas Gruenbacher 			return -EIO;
4784b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4785b411b363SPhilipp Reisner 		have += bits;
4786b411b363SPhilipp Reisner 	}
4787b411b363SPhilipp Reisner 
4788b411b363SPhilipp Reisner 	c->bit_offset = s;
4789b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4790b411b363SPhilipp Reisner 
47912c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4792b411b363SPhilipp Reisner }
4793b411b363SPhilipp Reisner 
47942c46407dSAndreas Gruenbacher /**
47952c46407dSAndreas Gruenbacher  * decode_bitmap_c
47962c46407dSAndreas Gruenbacher  *
47972c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47982c46407dSAndreas Gruenbacher  * code upon failure.
47992c46407dSAndreas Gruenbacher  */
48002c46407dSAndreas Gruenbacher static int
480169a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4802b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4803c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4804c6d25cfeSPhilipp Reisner 		unsigned int len)
4805b411b363SPhilipp Reisner {
4806a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
480769a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4808b411b363SPhilipp Reisner 
4809b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4810b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4811b411b363SPhilipp Reisner 	 * during all our tests. */
4812b411b363SPhilipp Reisner 
481369a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
481469a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
48152c46407dSAndreas Gruenbacher 	return -EIO;
4816b411b363SPhilipp Reisner }
4817b411b363SPhilipp Reisner 
4818b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4819b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4820b411b363SPhilipp Reisner {
4821b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4822a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
482350d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
482450d0b1adSAndreas Gruenbacher 	unsigned int plain =
482550d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
482650d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
482750d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
482850d0b1adSAndreas Gruenbacher 	unsigned int r;
4829b411b363SPhilipp Reisner 
4830b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4831b411b363SPhilipp Reisner 	if (total == 0)
4832b411b363SPhilipp Reisner 		return;
4833b411b363SPhilipp Reisner 
4834b411b363SPhilipp Reisner 	/* don't report if not compressed */
4835b411b363SPhilipp Reisner 	if (total >= plain)
4836b411b363SPhilipp Reisner 		return;
4837b411b363SPhilipp Reisner 
4838b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4839b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4840b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4841b411b363SPhilipp Reisner 
4842b411b363SPhilipp Reisner 	if (r > 1000)
4843b411b363SPhilipp Reisner 		r = 1000;
4844b411b363SPhilipp Reisner 
4845b411b363SPhilipp Reisner 	r = 1000 - r;
4846d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4847b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4848b411b363SPhilipp Reisner 			direction,
4849b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4850b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4851b411b363SPhilipp Reisner 			total, r/10, r % 10);
4852b411b363SPhilipp Reisner }
4853b411b363SPhilipp Reisner 
4854b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4855b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4856b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4857b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4858b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4859b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4860b411b363SPhilipp Reisner 
4861b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4862bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4863b411b363SPhilipp Reisner {
48649f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4865b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4866b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48672c46407dSAndreas Gruenbacher 	int err;
48684a76b161SAndreas Gruenbacher 
48699f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48709f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48714a76b161SAndreas Gruenbacher 		return -EIO;
48729f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4873b411b363SPhilipp Reisner 
4874b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
487520ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
487620ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4877b411b363SPhilipp Reisner 
4878b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4879b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4880b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4881b411b363SPhilipp Reisner 	};
4882b411b363SPhilipp Reisner 
48832c46407dSAndreas Gruenbacher 	for(;;) {
4884e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
488569a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4886e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4887b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4888b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4889e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4890b411b363SPhilipp Reisner 
4891bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4892d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
489382bc0194SAndreas Gruenbacher 				err = -EIO;
4894b411b363SPhilipp Reisner 				goto out;
4895b411b363SPhilipp Reisner 			}
4896e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4897d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
489882bc0194SAndreas Gruenbacher 				err = -EIO;
489978fcbdaeSAndreas Gruenbacher 				goto out;
4900b411b363SPhilipp Reisner 			}
49019f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4902e658983aSAndreas Gruenbacher 			if (err)
4903e658983aSAndreas Gruenbacher 			       goto out;
490469a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4905b411b363SPhilipp Reisner 		} else {
4906d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
490782bc0194SAndreas Gruenbacher 			err = -EIO;
4908b411b363SPhilipp Reisner 			goto out;
4909b411b363SPhilipp Reisner 		}
4910b411b363SPhilipp Reisner 
4911e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4912bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4913b411b363SPhilipp Reisner 
49142c46407dSAndreas Gruenbacher 		if (err <= 0) {
49152c46407dSAndreas Gruenbacher 			if (err < 0)
49162c46407dSAndreas Gruenbacher 				goto out;
4917b411b363SPhilipp Reisner 			break;
49182c46407dSAndreas Gruenbacher 		}
49199f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
492082bc0194SAndreas Gruenbacher 		if (err)
4921b411b363SPhilipp Reisner 			goto out;
49222c46407dSAndreas Gruenbacher 	}
4923b411b363SPhilipp Reisner 
4924b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4925b411b363SPhilipp Reisner 
4926b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4927de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4928de1f8e4aSAndreas Gruenbacher 
4929b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
493082bc0194SAndreas Gruenbacher 		if (err)
4931b411b363SPhilipp Reisner 			goto out;
4932b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4933b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
49340b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4935b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4936b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4937b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4938d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4939b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4940b411b363SPhilipp Reisner 	}
494182bc0194SAndreas Gruenbacher 	err = 0;
4942b411b363SPhilipp Reisner 
4943b411b363SPhilipp Reisner  out:
4944b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4945b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4946b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
494782bc0194SAndreas Gruenbacher 	return err;
4948b411b363SPhilipp Reisner }
4949b411b363SPhilipp Reisner 
4950bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4951b411b363SPhilipp Reisner {
49521ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4953e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4954b411b363SPhilipp Reisner 
4955bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4956b411b363SPhilipp Reisner }
4957b411b363SPhilipp Reisner 
4958bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4959b411b363SPhilipp Reisner {
4960b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4961b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4962ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
496382bc0194SAndreas Gruenbacher 	return 0;
4964b411b363SPhilipp Reisner }
4965b411b363SPhilipp Reisner 
4966bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
496773a01a18SPhilipp Reisner {
49689f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4969b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4970e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49714a76b161SAndreas Gruenbacher 
49729f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49739f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49744a76b161SAndreas Gruenbacher 		return -EIO;
49759f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
497673a01a18SPhilipp Reisner 
4977b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4978f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4979f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4980f735e363SLars Ellenberg 	case C_BEHIND:
4981f735e363SLars Ellenberg 			break;
4982f735e363SLars Ellenberg 	default:
4983d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4984b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4985f735e363SLars Ellenberg 	}
4986f735e363SLars Ellenberg 
4987b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
498873a01a18SPhilipp Reisner 
498982bc0194SAndreas Gruenbacher 	return 0;
499073a01a18SPhilipp Reisner }
499173a01a18SPhilipp Reisner 
4992700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4993700ca8c0SPhilipp Reisner {
4994700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4995700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4996700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4997700ca8c0SPhilipp Reisner 	sector_t sector;
4998700ca8c0SPhilipp Reisner 	int size, err = 0;
4999700ca8c0SPhilipp Reisner 
5000700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
5001700ca8c0SPhilipp Reisner 	if (!peer_device)
5002700ca8c0SPhilipp Reisner 		return -EIO;
5003700ca8c0SPhilipp Reisner 	device = peer_device->device;
5004700ca8c0SPhilipp Reisner 
5005700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5006700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5007700ca8c0SPhilipp Reisner 
5008700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
5009700ca8c0SPhilipp Reisner 
5010700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
5011700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
501245c21793SChristoph Hellwig 		const int op = REQ_OP_WRITE_ZEROES;
5013700ca8c0SPhilipp Reisner 
5014700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
50159104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
5016700ca8c0SPhilipp Reisner 		if (!peer_req) {
5017700ca8c0SPhilipp Reisner 			put_ldev(device);
5018700ca8c0SPhilipp Reisner 			return -ENOMEM;
5019700ca8c0SPhilipp Reisner 		}
5020700ca8c0SPhilipp Reisner 
5021700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
5022700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
5023f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
5024700ca8c0SPhilipp Reisner 
5025700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
5026700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
5027700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
5028700ca8c0SPhilipp Reisner 
5029700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
5030700ca8c0SPhilipp Reisner 		err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
5031700ca8c0SPhilipp Reisner 
5032700ca8c0SPhilipp Reisner 		if (err) {
5033700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
5034700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
5035700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
5036700ca8c0SPhilipp Reisner 
5037700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
5038700ca8c0SPhilipp Reisner 			put_ldev(device);
5039700ca8c0SPhilipp Reisner 			err = 0;
5040700ca8c0SPhilipp Reisner 			goto fail;
5041700ca8c0SPhilipp Reisner 		}
5042700ca8c0SPhilipp Reisner 
5043700ca8c0SPhilipp Reisner 		inc_unacked(device);
5044700ca8c0SPhilipp Reisner 
5045700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
5046700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
5047700ca8c0SPhilipp Reisner 	} else {
5048700ca8c0SPhilipp Reisner 	fail:
5049700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
5050700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5051700ca8c0SPhilipp Reisner 	}
5052700ca8c0SPhilipp Reisner 
5053700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5054700ca8c0SPhilipp Reisner 
5055700ca8c0SPhilipp Reisner 	return err;
5056700ca8c0SPhilipp Reisner }
5057700ca8c0SPhilipp Reisner 
505802918be2SPhilipp Reisner struct data_cmd {
505902918be2SPhilipp Reisner 	int expect_payload;
50609104d31aSLars Ellenberg 	unsigned int pkt_size;
5061bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5062b411b363SPhilipp Reisner };
5063b411b363SPhilipp Reisner 
506402918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
506502918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
506602918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
506702918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
506802918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5069e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5070e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5071e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
507202918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
507302918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5074e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5075e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
507602918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
507702918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
507802918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
507902918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
508002918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
508102918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
508202918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
508302918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
508402918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5085700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
508602918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
508773a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50884a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5089036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5090a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5091f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5092700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
50939104d31aSLars Ellenberg 	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
509402918be2SPhilipp Reisner };
509502918be2SPhilipp Reisner 
5096bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5097b411b363SPhilipp Reisner {
509877351055SPhilipp Reisner 	struct packet_info pi;
509902918be2SPhilipp Reisner 	size_t shs; /* sub header size */
510082bc0194SAndreas Gruenbacher 	int err;
5101b411b363SPhilipp Reisner 
5102bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
51039104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5104deebe195SAndreas Gruenbacher 
5105bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5106c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5107c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
510802918be2SPhilipp Reisner 			goto err_out;
510902918be2SPhilipp Reisner 
5110deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
51114a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
51121ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
51132fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
511402918be2SPhilipp Reisner 			goto err_out;
51150b33a916SLars Ellenberg 		}
5116b411b363SPhilipp Reisner 
5117e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
51189104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
51199104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5120e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
51211ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
51222fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5123c13f7e1aSLars Ellenberg 			goto err_out;
5124c13f7e1aSLars Ellenberg 		}
51259104d31aSLars Ellenberg 		if (pi.size < shs) {
51269104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
51279104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
51289104d31aSLars Ellenberg 			goto err_out;
51299104d31aSLars Ellenberg 		}
5130c13f7e1aSLars Ellenberg 
5131c13f7e1aSLars Ellenberg 		if (shs) {
5132944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5133bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5134a5c31904SAndreas Gruenbacher 			if (err)
513502918be2SPhilipp Reisner 				goto err_out;
5136e2857216SAndreas Gruenbacher 			pi.size -= shs;
5137b411b363SPhilipp Reisner 		}
513802918be2SPhilipp Reisner 
5139944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5140bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
51414a76b161SAndreas Gruenbacher 		if (err) {
51421ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
51439f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
514402918be2SPhilipp Reisner 			goto err_out;
514502918be2SPhilipp Reisner 		}
514602918be2SPhilipp Reisner 	}
514782bc0194SAndreas Gruenbacher 	return;
514802918be2SPhilipp Reisner 
514902918be2SPhilipp Reisner     err_out:
5150bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5151b411b363SPhilipp Reisner }
5152b411b363SPhilipp Reisner 
5153bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5154f70b3511SPhilipp Reisner {
5155c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5156bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5157376694a0SPhilipp Reisner 	int vnr;
5158f70b3511SPhilipp Reisner 
5159bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5160b411b363SPhilipp Reisner 		return;
5161b411b363SPhilipp Reisner 
5162545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5163545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5164545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5165545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5166545752d5SLars Ellenberg 	 */
5167bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5168545752d5SLars Ellenberg 
5169668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51701c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5171668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5172668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5173668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5174668700b4SPhilipp Reisner 	}
5175bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5176360cc740SPhilipp Reisner 
5177c141ebdaSPhilipp Reisner 	rcu_read_lock();
5178c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5179c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5180b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5181c141ebdaSPhilipp Reisner 		rcu_read_unlock();
518269a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5183c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5184c141ebdaSPhilipp Reisner 		rcu_read_lock();
5185c141ebdaSPhilipp Reisner 	}
5186c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5187c141ebdaSPhilipp Reisner 
5188bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51891ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
519012038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5191bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5192bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
519312038a3aSPhilipp Reisner 
51941ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5195360cc740SPhilipp Reisner 
5196bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5197bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5198cb703454SPhilipp Reisner 
51990500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5200bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5201bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5202bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5203bbeb641cSPhilipp Reisner 
52040500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5205360cc740SPhilipp Reisner 
5206f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5207bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5208360cc740SPhilipp Reisner }
5209360cc740SPhilipp Reisner 
521069a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5211360cc740SPhilipp Reisner {
521269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5213360cc740SPhilipp Reisner 	unsigned int i;
5214b411b363SPhilipp Reisner 
521585719573SPhilipp Reisner 	/* wait for current activity to cease. */
52160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5217b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5218b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5219b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
52200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5221b411b363SPhilipp Reisner 
5222b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5223b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5224b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5225b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5226b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5227b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5228b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5229b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5230b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5231b411b363SPhilipp Reisner 	 *  on the fly. */
5232b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5233b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5234b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5235b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5236b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5237b411b363SPhilipp Reisner 
5238b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
52392bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5240b411b363SPhilipp Reisner 
5241b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5242b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5243b411b363SPhilipp Reisner 	 * to be "canceled" */
5244b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5245b411b363SPhilipp Reisner 
5246b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5247b411b363SPhilipp Reisner 
5248d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5249d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5250d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5251b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5252d10b4ea3SPhilipp Reisner 
525308332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
525408332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5255b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5256b411b363SPhilipp Reisner 
5257b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5258b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5259b411b363SPhilipp Reisner 
5260b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
526169a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5262b411b363SPhilipp Reisner 
5263b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5264b411b363SPhilipp Reisner 
5265be115b69SLars Ellenberg 	if (get_ldev(device)) {
5266be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5267be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5268be115b69SLars Ellenberg 		put_ldev(device);
5269be115b69SLars Ellenberg 	}
527020ceb2b2SLars Ellenberg 
5271b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5272b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5273b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5274b411b363SPhilipp Reisner 	 *
5275b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5276b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5277b411b363SPhilipp Reisner 	 */
5278b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5279b411b363SPhilipp Reisner 	if (i)
5280d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5281b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5282435f0740SLars Ellenberg 	if (i)
5283d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5284b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5285b411b363SPhilipp Reisner 	if (i)
5286d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5287b411b363SPhilipp Reisner 
52880b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52890b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52900b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52910b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5292b411b363SPhilipp Reisner 
5293360cc740SPhilipp Reisner 	return 0;
5294b411b363SPhilipp Reisner }
5295b411b363SPhilipp Reisner 
5296b411b363SPhilipp Reisner /*
5297b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5298b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5299b411b363SPhilipp Reisner  *
5300b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5301b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5302b411b363SPhilipp Reisner  *
5303b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5304b411b363SPhilipp Reisner  */
5305bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5306b411b363SPhilipp Reisner {
53079f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
53089f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5309b411b363SPhilipp Reisner 
5310bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5311bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
53129f5bdc33SAndreas Gruenbacher 	if (!p)
5313e8d17b01SAndreas Gruenbacher 		return -EIO;
5314b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5315b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5316b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
531720c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5318bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5319b411b363SPhilipp Reisner }
5320b411b363SPhilipp Reisner 
5321b411b363SPhilipp Reisner /*
5322b411b363SPhilipp Reisner  * return values:
5323b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5324b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5325b411b363SPhilipp Reisner  *  -1 peer talks different language,
5326b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5327b411b363SPhilipp Reisner  */
5328bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5329b411b363SPhilipp Reisner {
5330bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5331e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5332e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
533377351055SPhilipp Reisner 	struct packet_info pi;
5334a5c31904SAndreas Gruenbacher 	int err;
5335b411b363SPhilipp Reisner 
5336bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5337e8d17b01SAndreas Gruenbacher 	if (err)
5338b411b363SPhilipp Reisner 		return 0;
5339b411b363SPhilipp Reisner 
5340bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
534169bc7bc3SAndreas Gruenbacher 	if (err)
5342b411b363SPhilipp Reisner 		return 0;
5343b411b363SPhilipp Reisner 
53446038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
53451ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
534677351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5347b411b363SPhilipp Reisner 		return -1;
5348b411b363SPhilipp Reisner 	}
5349b411b363SPhilipp Reisner 
535077351055SPhilipp Reisner 	if (pi.size != expect) {
53511ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
535277351055SPhilipp Reisner 		     expect, pi.size);
5353b411b363SPhilipp Reisner 		return -1;
5354b411b363SPhilipp Reisner 	}
5355b411b363SPhilipp Reisner 
5356e658983aSAndreas Gruenbacher 	p = pi.data;
5357bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5358a5c31904SAndreas Gruenbacher 	if (err)
5359b411b363SPhilipp Reisner 		return 0;
5360b411b363SPhilipp Reisner 
5361b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5362b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5363b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5364b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5365b411b363SPhilipp Reisner 
5366b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5367b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5368b411b363SPhilipp Reisner 		goto incompat;
5369b411b363SPhilipp Reisner 
5370bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
537120c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5372b411b363SPhilipp Reisner 
53731ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5374bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5375b411b363SPhilipp Reisner 
5376f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53779104d31aSLars Ellenberg 		  connection->agreed_features,
53789104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53799104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5380f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5381f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53829104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
538392d94ae6SPhilipp Reisner 
5384b411b363SPhilipp Reisner 	return 1;
5385b411b363SPhilipp Reisner 
5386b411b363SPhilipp Reisner  incompat:
53871ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5388b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5389b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5390b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5391b411b363SPhilipp Reisner 	return -1;
5392b411b363SPhilipp Reisner }
5393b411b363SPhilipp Reisner 
5394b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5395bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5396b411b363SPhilipp Reisner {
53971ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53981ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5399b10d96cbSJohannes Thoma 	return -1;
5400b411b363SPhilipp Reisner }
5401b411b363SPhilipp Reisner #else
5402b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5403b10d96cbSJohannes Thoma 
5404b10d96cbSJohannes Thoma /* Return value:
5405b10d96cbSJohannes Thoma 	1 - auth succeeded,
5406b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5407b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5408b10d96cbSJohannes Thoma */
5409b10d96cbSJohannes Thoma 
5410bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5411b411b363SPhilipp Reisner {
54129f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5413b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5414b411b363SPhilipp Reisner 	char *response = NULL;
5415b411b363SPhilipp Reisner 	char *right_response = NULL;
5416b411b363SPhilipp Reisner 	char *peers_ch = NULL;
541744ed167dSPhilipp Reisner 	unsigned int key_len;
541844ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5419b411b363SPhilipp Reisner 	unsigned int resp_size;
542077ce56e2SArnd Bergmann 	struct shash_desc *desc;
542177351055SPhilipp Reisner 	struct packet_info pi;
542244ed167dSPhilipp Reisner 	struct net_conf *nc;
542369bc7bc3SAndreas Gruenbacher 	int err, rv;
5424b411b363SPhilipp Reisner 
54259f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
54269f5bdc33SAndreas Gruenbacher 
542744ed167dSPhilipp Reisner 	rcu_read_lock();
5428bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
542944ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
543044ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
543144ed167dSPhilipp Reisner 	rcu_read_unlock();
543244ed167dSPhilipp Reisner 
543377ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
543477ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
543577ce56e2SArnd Bergmann 		       GFP_KERNEL);
543677ce56e2SArnd Bergmann 	if (!desc) {
543777ce56e2SArnd Bergmann 		rv = -1;
543877ce56e2SArnd Bergmann 		goto fail;
543977ce56e2SArnd Bergmann 	}
54409534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5441b411b363SPhilipp Reisner 
54429534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5443b411b363SPhilipp Reisner 	if (rv) {
54449534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5445b10d96cbSJohannes Thoma 		rv = -1;
5446b411b363SPhilipp Reisner 		goto fail;
5447b411b363SPhilipp Reisner 	}
5448b411b363SPhilipp Reisner 
5449b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5450b411b363SPhilipp Reisner 
5451bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5452bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54539f5bdc33SAndreas Gruenbacher 		rv = 0;
54549f5bdc33SAndreas Gruenbacher 		goto fail;
54559f5bdc33SAndreas Gruenbacher 	}
5456bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54579f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5458b411b363SPhilipp Reisner 	if (!rv)
5459b411b363SPhilipp Reisner 		goto fail;
5460b411b363SPhilipp Reisner 
5461bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
546269bc7bc3SAndreas Gruenbacher 	if (err) {
5463b411b363SPhilipp Reisner 		rv = 0;
5464b411b363SPhilipp Reisner 		goto fail;
5465b411b363SPhilipp Reisner 	}
5466b411b363SPhilipp Reisner 
546777351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54681ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
546977351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54709049ccd4SLars Ellenberg 		rv = -1;
5471b411b363SPhilipp Reisner 		goto fail;
5472b411b363SPhilipp Reisner 	}
5473b411b363SPhilipp Reisner 
547477351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54751ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5476b10d96cbSJohannes Thoma 		rv = -1;
5477b411b363SPhilipp Reisner 		goto fail;
5478b411b363SPhilipp Reisner 	}
5479b411b363SPhilipp Reisner 
548067cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
548167cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
548267cca286SPhilipp Reisner 		rv = -1;
548367cca286SPhilipp Reisner 		goto fail;
548467cca286SPhilipp Reisner 	}
548567cca286SPhilipp Reisner 
548677351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
5487b411b363SPhilipp Reisner 	if (peers_ch == NULL) {
54881ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of peers_ch failed\n");
5489b10d96cbSJohannes Thoma 		rv = -1;
5490b411b363SPhilipp Reisner 		goto fail;
5491b411b363SPhilipp Reisner 	}
5492b411b363SPhilipp Reisner 
5493bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5494a5c31904SAndreas Gruenbacher 	if (err) {
5495b411b363SPhilipp Reisner 		rv = 0;
5496b411b363SPhilipp Reisner 		goto fail;
5497b411b363SPhilipp Reisner 	}
5498b411b363SPhilipp Reisner 
549967cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
550067cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
550167cca286SPhilipp Reisner 		rv = -1;
550267cca286SPhilipp Reisner 		goto fail;
550367cca286SPhilipp Reisner 	}
550467cca286SPhilipp Reisner 
55059534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5506b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
5507b411b363SPhilipp Reisner 	if (response == NULL) {
55081ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of response failed\n");
5509b10d96cbSJohannes Thoma 		rv = -1;
5510b411b363SPhilipp Reisner 		goto fail;
5511b411b363SPhilipp Reisner 	}
5512b411b363SPhilipp Reisner 
55139534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5514b411b363SPhilipp Reisner 	if (rv) {
55151ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5516b10d96cbSJohannes Thoma 		rv = -1;
5517b411b363SPhilipp Reisner 		goto fail;
5518b411b363SPhilipp Reisner 	}
5519b411b363SPhilipp Reisner 
5520bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
55219f5bdc33SAndreas Gruenbacher 		rv = 0;
55229f5bdc33SAndreas Gruenbacher 		goto fail;
55239f5bdc33SAndreas Gruenbacher 	}
5524bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
55259f5bdc33SAndreas Gruenbacher 				response, resp_size);
5526b411b363SPhilipp Reisner 	if (!rv)
5527b411b363SPhilipp Reisner 		goto fail;
5528b411b363SPhilipp Reisner 
5529bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
553069bc7bc3SAndreas Gruenbacher 	if (err) {
5531b411b363SPhilipp Reisner 		rv = 0;
5532b411b363SPhilipp Reisner 		goto fail;
5533b411b363SPhilipp Reisner 	}
5534b411b363SPhilipp Reisner 
553577351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
55361ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
553777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5538b411b363SPhilipp Reisner 		rv = 0;
5539b411b363SPhilipp Reisner 		goto fail;
5540b411b363SPhilipp Reisner 	}
5541b411b363SPhilipp Reisner 
554277351055SPhilipp Reisner 	if (pi.size != resp_size) {
55431ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5544b411b363SPhilipp Reisner 		rv = 0;
5545b411b363SPhilipp Reisner 		goto fail;
5546b411b363SPhilipp Reisner 	}
5547b411b363SPhilipp Reisner 
5548bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5549a5c31904SAndreas Gruenbacher 	if (err) {
5550b411b363SPhilipp Reisner 		rv = 0;
5551b411b363SPhilipp Reisner 		goto fail;
5552b411b363SPhilipp Reisner 	}
5553b411b363SPhilipp Reisner 
5554b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55552d1ee87dSJulia Lawall 	if (right_response == NULL) {
55561ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of right_response failed\n");
5557b10d96cbSJohannes Thoma 		rv = -1;
5558b411b363SPhilipp Reisner 		goto fail;
5559b411b363SPhilipp Reisner 	}
5560b411b363SPhilipp Reisner 
55619534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55629534d671SHerbert Xu 				 right_response);
5563b411b363SPhilipp Reisner 	if (rv) {
55641ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5565b10d96cbSJohannes Thoma 		rv = -1;
5566b411b363SPhilipp Reisner 		goto fail;
5567b411b363SPhilipp Reisner 	}
5568b411b363SPhilipp Reisner 
5569b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5570b411b363SPhilipp Reisner 
5571b411b363SPhilipp Reisner 	if (rv)
55721ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
557344ed167dSPhilipp Reisner 		     resp_size);
5574b10d96cbSJohannes Thoma 	else
5575b10d96cbSJohannes Thoma 		rv = -1;
5576b411b363SPhilipp Reisner 
5577b411b363SPhilipp Reisner  fail:
5578b411b363SPhilipp Reisner 	kfree(peers_ch);
5579b411b363SPhilipp Reisner 	kfree(response);
5580b411b363SPhilipp Reisner 	kfree(right_response);
558177ce56e2SArnd Bergmann 	if (desc) {
55829534d671SHerbert Xu 		shash_desc_zero(desc);
558377ce56e2SArnd Bergmann 		kfree(desc);
558477ce56e2SArnd Bergmann 	}
5585b411b363SPhilipp Reisner 
5586b411b363SPhilipp Reisner 	return rv;
5587b411b363SPhilipp Reisner }
5588b411b363SPhilipp Reisner #endif
5589b411b363SPhilipp Reisner 
55908fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5591b411b363SPhilipp Reisner {
5592bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5593b411b363SPhilipp Reisner 	int h;
5594b411b363SPhilipp Reisner 
55951ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5596b411b363SPhilipp Reisner 
5597b411b363SPhilipp Reisner 	do {
5598bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5599b411b363SPhilipp Reisner 		if (h == 0) {
5600bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
560120ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5602b411b363SPhilipp Reisner 		}
5603b411b363SPhilipp Reisner 		if (h == -1) {
56041ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5605bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5606b411b363SPhilipp Reisner 		}
5607b411b363SPhilipp Reisner 	} while (h == 0);
5608b411b363SPhilipp Reisner 
5609c51a0ef3SLars Ellenberg 	if (h > 0) {
5610c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5611bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5612c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5613c51a0ef3SLars Ellenberg 	}
5614b411b363SPhilipp Reisner 
5615bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5616b411b363SPhilipp Reisner 
56171ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5618b411b363SPhilipp Reisner 	return 0;
5619b411b363SPhilipp Reisner }
5620b411b363SPhilipp Reisner 
5621b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5622b411b363SPhilipp Reisner 
5623bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5624b411b363SPhilipp Reisner {
5625e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5626b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5627b411b363SPhilipp Reisner 
5628b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5629bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5630b411b363SPhilipp Reisner 	} else {
5631bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
56321ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5633fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5634fc3b10a4SPhilipp Reisner 	}
5635bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5636e4f78edeSPhilipp Reisner 
56372735a594SAndreas Gruenbacher 	return 0;
5638fc3b10a4SPhilipp Reisner }
5639e4f78edeSPhilipp Reisner 
5640bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5641e4f78edeSPhilipp Reisner {
56429f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5643b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5644e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5645e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5646e4f78edeSPhilipp Reisner 
56479f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56489f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56492735a594SAndreas Gruenbacher 		return -EIO;
56509f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56511952e916SAndreas Gruenbacher 
5652bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56530b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5654bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
56554d0fc3fdSPhilipp Reisner 	}
56564d0fc3fdSPhilipp Reisner 
5657e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5658b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5659e4f78edeSPhilipp Reisner 	} else {
5660b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5661d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5662b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5663b411b363SPhilipp Reisner 	}
5664b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5665b411b363SPhilipp Reisner 
56662735a594SAndreas Gruenbacher 	return 0;
5667b411b363SPhilipp Reisner }
5668b411b363SPhilipp Reisner 
5669bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5670b411b363SPhilipp Reisner {
5671bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5672b411b363SPhilipp Reisner 
5673b411b363SPhilipp Reisner }
5674b411b363SPhilipp Reisner 
5675bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5676b411b363SPhilipp Reisner {
5677b411b363SPhilipp Reisner 	/* restore idle timeout */
5678bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5679bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5680bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5681b411b363SPhilipp Reisner 
56822735a594SAndreas Gruenbacher 	return 0;
5683b411b363SPhilipp Reisner }
5684b411b363SPhilipp Reisner 
5685bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5686b411b363SPhilipp Reisner {
56879f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5688b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5689e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5690b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5691b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5692b411b363SPhilipp Reisner 
56939f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56949f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56952735a594SAndreas Gruenbacher 		return -EIO;
56969f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56971952e916SAndreas Gruenbacher 
56989f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5699b411b363SPhilipp Reisner 
570069a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5701b411b363SPhilipp Reisner 
5702b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5703b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5704b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5705b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5706b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5707b30ab791SAndreas Gruenbacher 		put_ldev(device);
57081d53f09eSLars Ellenberg 	}
5709b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5710b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5711b411b363SPhilipp Reisner 
57122735a594SAndreas Gruenbacher 	return 0;
5713b411b363SPhilipp Reisner }
5714b411b363SPhilipp Reisner 
5715bc9c5c41SAndreas Gruenbacher static int
5716b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5717bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5718bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5719b411b363SPhilipp Reisner {
5720b411b363SPhilipp Reisner 	struct drbd_request *req;
5721b411b363SPhilipp Reisner 	struct bio_and_error m;
5722b411b363SPhilipp Reisner 
57230500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5724b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5725b411b363SPhilipp Reisner 	if (unlikely(!req)) {
57260500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
572785997675SAndreas Gruenbacher 		return -EIO;
5728b411b363SPhilipp Reisner 	}
5729b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
57300500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5731b411b363SPhilipp Reisner 
5732b411b363SPhilipp Reisner 	if (m.bio)
5733b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
573485997675SAndreas Gruenbacher 	return 0;
5735b411b363SPhilipp Reisner }
5736b411b363SPhilipp Reisner 
5737bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5738b411b363SPhilipp Reisner {
57399f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5740b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5741e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5742b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5743b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5744b411b363SPhilipp Reisner 	enum drbd_req_event what;
5745b411b363SPhilipp Reisner 
57469f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57479f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57482735a594SAndreas Gruenbacher 		return -EIO;
57499f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57501952e916SAndreas Gruenbacher 
575169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5752b411b363SPhilipp Reisner 
5753579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5754b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5755b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
57562735a594SAndreas Gruenbacher 		return 0;
5757b411b363SPhilipp Reisner 	}
5758e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5759b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57608554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5761b411b363SPhilipp Reisner 		break;
5762b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57638554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5764b411b363SPhilipp Reisner 		break;
5765b411b363SPhilipp Reisner 	case P_RECV_ACK:
57668554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5767b411b363SPhilipp Reisner 		break;
5768d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5769d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57707be8da07SAndreas Gruenbacher 		break;
57717be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57727be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5773b411b363SPhilipp Reisner 		break;
5774b411b363SPhilipp Reisner 	default:
57752735a594SAndreas Gruenbacher 		BUG();
5776b411b363SPhilipp Reisner 	}
5777b411b363SPhilipp Reisner 
5778b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5779b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5780bc9c5c41SAndreas Gruenbacher 					     what, false);
5781b411b363SPhilipp Reisner }
5782b411b363SPhilipp Reisner 
5783bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5784b411b363SPhilipp Reisner {
57859f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5786b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5787e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5788b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57892deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
579085997675SAndreas Gruenbacher 	int err;
5791b411b363SPhilipp Reisner 
57929f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57939f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57942735a594SAndreas Gruenbacher 		return -EIO;
57959f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5796b411b363SPhilipp Reisner 
579769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5798b411b363SPhilipp Reisner 
5799579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5800b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5801b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
58022735a594SAndreas Gruenbacher 		return 0;
5803b411b363SPhilipp Reisner 	}
58042deb8336SPhilipp Reisner 
5805b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5806b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5807303d1448SPhilipp Reisner 					    NEG_ACKED, true);
580885997675SAndreas Gruenbacher 	if (err) {
58092deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
58102deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5811c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
58122deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
58132deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5814b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
58152deb8336SPhilipp Reisner 	}
58162735a594SAndreas Gruenbacher 	return 0;
5817b411b363SPhilipp Reisner }
5818b411b363SPhilipp Reisner 
5819bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5820b411b363SPhilipp Reisner {
58219f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5822b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5823e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5824b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5825b411b363SPhilipp Reisner 
58269f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58279f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58282735a594SAndreas Gruenbacher 		return -EIO;
58299f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58301952e916SAndreas Gruenbacher 
583169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
58327be8da07SAndreas Gruenbacher 
5833d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5834b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5835b411b363SPhilipp Reisner 
5836b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5837b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
58388554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5839b411b363SPhilipp Reisner }
5840b411b363SPhilipp Reisner 
5841bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5842b411b363SPhilipp Reisner {
58439f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5844b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5845b411b363SPhilipp Reisner 	sector_t sector;
5846b411b363SPhilipp Reisner 	int size;
5847e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
58481952e916SAndreas Gruenbacher 
58499f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58509f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58512735a594SAndreas Gruenbacher 		return -EIO;
58529f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5853b411b363SPhilipp Reisner 
5854b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5855b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5856b411b363SPhilipp Reisner 
585769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5858b411b363SPhilipp Reisner 
5859b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5860b411b363SPhilipp Reisner 
5861b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5862b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5863e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5864d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5865b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
5866d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5867d612d309SPhilipp Reisner 			break;
5868d612d309SPhilipp Reisner 		default:
58692735a594SAndreas Gruenbacher 			BUG();
5870d612d309SPhilipp Reisner 		}
5871b30ab791SAndreas Gruenbacher 		put_ldev(device);
5872b411b363SPhilipp Reisner 	}
5873b411b363SPhilipp Reisner 
58742735a594SAndreas Gruenbacher 	return 0;
5875b411b363SPhilipp Reisner }
5876b411b363SPhilipp Reisner 
5877bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5878b411b363SPhilipp Reisner {
5879e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5880c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58819ed57dcbSLars Ellenberg 	int vnr;
5882b411b363SPhilipp Reisner 
5883bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5884b411b363SPhilipp Reisner 
58859ed57dcbSLars Ellenberg 	rcu_read_lock();
5886c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5887c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5888c06ece6bSAndreas Gruenbacher 
5889b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5890b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5891b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5892b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5893b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5894c4752ef1SPhilipp Reisner 		}
58959ed57dcbSLars Ellenberg 	}
58969ed57dcbSLars Ellenberg 	rcu_read_unlock();
5897c4752ef1SPhilipp Reisner 
58982735a594SAndreas Gruenbacher 	return 0;
5899b411b363SPhilipp Reisner }
5900b411b363SPhilipp Reisner 
5901bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5902b411b363SPhilipp Reisner {
59039f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5904b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5905e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
590684b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5907b411b363SPhilipp Reisner 	sector_t sector;
5908b411b363SPhilipp Reisner 	int size;
5909b411b363SPhilipp Reisner 
59109f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
59119f4fe9adSAndreas Gruenbacher 	if (!peer_device)
59122735a594SAndreas Gruenbacher 		return -EIO;
59139f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
59141952e916SAndreas Gruenbacher 
5915b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5916b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5917b411b363SPhilipp Reisner 
591869a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5919b411b363SPhilipp Reisner 
5920b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5921b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5922b411b363SPhilipp Reisner 	else
5923b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5924b411b363SPhilipp Reisner 
5925b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
59262735a594SAndreas Gruenbacher 		return 0;
59271d53f09eSLars Ellenberg 
5928b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5929b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5930b411b363SPhilipp Reisner 
5931b30ab791SAndreas Gruenbacher 	--device->ov_left;
5932ea5442afSLars Ellenberg 
5933ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5934b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5935b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5936ea5442afSLars Ellenberg 
5937b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
593884b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
593984b8c06bSAndreas Gruenbacher 		if (dw) {
594084b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
594184b8c06bSAndreas Gruenbacher 			dw->device = device;
594284b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5943b411b363SPhilipp Reisner 		} else {
594484b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5945b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5946b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5947b411b363SPhilipp Reisner 		}
5948b411b363SPhilipp Reisner 	}
5949b30ab791SAndreas Gruenbacher 	put_ldev(device);
59502735a594SAndreas Gruenbacher 	return 0;
5951b411b363SPhilipp Reisner }
5952b411b363SPhilipp Reisner 
5953bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59540ced55a3SPhilipp Reisner {
59552735a594SAndreas Gruenbacher 	return 0;
59560ced55a3SPhilipp Reisner }
59570ced55a3SPhilipp Reisner 
5958668700b4SPhilipp Reisner struct meta_sock_cmd {
5959b411b363SPhilipp Reisner 	size_t pkt_size;
5960bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5961b411b363SPhilipp Reisner };
5962b411b363SPhilipp Reisner 
5963668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5964668700b4SPhilipp Reisner {
5965668700b4SPhilipp Reisner 	long t;
5966668700b4SPhilipp Reisner 	struct net_conf *nc;
5967668700b4SPhilipp Reisner 
5968668700b4SPhilipp Reisner 	rcu_read_lock();
5969668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5970668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5971668700b4SPhilipp Reisner 	rcu_read_unlock();
5972668700b4SPhilipp Reisner 
5973668700b4SPhilipp Reisner 	t *= HZ;
5974668700b4SPhilipp Reisner 	if (ping_timeout)
5975668700b4SPhilipp Reisner 		t /= 10;
5976668700b4SPhilipp Reisner 
5977668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5978668700b4SPhilipp Reisner }
5979668700b4SPhilipp Reisner 
5980668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5981668700b4SPhilipp Reisner {
5982668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5983668700b4SPhilipp Reisner }
5984668700b4SPhilipp Reisner 
5985668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5986668700b4SPhilipp Reisner {
5987668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5988668700b4SPhilipp Reisner }
5989668700b4SPhilipp Reisner 
5990668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5991e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5992e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5993b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5994b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5995b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5996d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5997b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5998b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5999b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
6000b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
6001b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
6002b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
6003b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
600402918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
6005d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
60061952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
60071952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
6008b411b363SPhilipp Reisner };
6009b411b363SPhilipp Reisner 
60101c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
6011b411b363SPhilipp Reisner {
6012bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
6013668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
601477351055SPhilipp Reisner 	struct packet_info pi;
6015668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
6016257d0af6SPhilipp Reisner 	int rv;
6017bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
6018b411b363SPhilipp Reisner 	int received = 0;
6019bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
602052b061a4SAndreas Gruenbacher 	int expect   = header_size;
602144ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
6022b411b363SPhilipp Reisner 
60238b700983SPeter Zijlstra 	sched_set_fifo_low(current);
6024b411b363SPhilipp Reisner 
6025e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
602680822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
602744ed167dSPhilipp Reisner 
6028668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
602944ed167dSPhilipp Reisner 
6030bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
6031bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
60321ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
6033841ce241SAndreas Gruenbacher 				goto reconnect;
6034841ce241SAndreas Gruenbacher 			}
6035668700b4SPhilipp Reisner 			set_ping_timeout(connection);
603644ed167dSPhilipp Reisner 			ping_timeout_active = true;
6037b411b363SPhilipp Reisner 		}
6038b411b363SPhilipp Reisner 
6039668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
6040bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
6041b411b363SPhilipp Reisner 
6042b411b363SPhilipp Reisner 		/* Note:
6043b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
6044b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
6045b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
6046b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
6047b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
6048b411b363SPhilipp Reisner 		 * rv == expected: full header or command
6049b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
6050b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
6051b411b363SPhilipp Reisner 		 */
6052b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
6053b411b363SPhilipp Reisner 			received += rv;
6054b411b363SPhilipp Reisner 			buf	 += rv;
6055b411b363SPhilipp Reisner 		} else if (rv == 0) {
6056bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6057b66623e3SPhilipp Reisner 				long t;
6058b66623e3SPhilipp Reisner 				rcu_read_lock();
6059bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6060b66623e3SPhilipp Reisner 				rcu_read_unlock();
6061b66623e3SPhilipp Reisner 
6062bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6063bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6064b66623e3SPhilipp Reisner 						       t);
6065599377acSPhilipp Reisner 				if (t)
6066599377acSPhilipp Reisner 					break;
6067599377acSPhilipp Reisner 			}
60681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6069b411b363SPhilipp Reisner 			goto reconnect;
6070b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6071cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6072cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6073668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6074cb6518cbSLars Ellenberg 				continue;
6075f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60761ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6077b411b363SPhilipp Reisner 				goto reconnect;
6078b411b363SPhilipp Reisner 			}
6079bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6080b411b363SPhilipp Reisner 			continue;
6081b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6082668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6083668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6084668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6085668700b4SPhilipp Reisner 			flush_signals(current);
6086b411b363SPhilipp Reisner 			continue;
6087b411b363SPhilipp Reisner 		} else {
60881ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6089b411b363SPhilipp Reisner 			goto reconnect;
6090b411b363SPhilipp Reisner 		}
6091b411b363SPhilipp Reisner 
6092b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6093bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6094b411b363SPhilipp Reisner 				goto reconnect;
6095668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6096668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60971ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60982fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6099b411b363SPhilipp Reisner 				goto disconnect;
6100b411b363SPhilipp Reisner 			}
6101e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
610252b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
61031ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
610477351055SPhilipp Reisner 					pi.cmd, pi.size);
6105b411b363SPhilipp Reisner 				goto reconnect;
6106b411b363SPhilipp Reisner 			}
6107257d0af6SPhilipp Reisner 		}
6108b411b363SPhilipp Reisner 		if (received == expect) {
61092735a594SAndreas Gruenbacher 			bool err;
6110a4fbda8eSPhilipp Reisner 
6111bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
61122735a594SAndreas Gruenbacher 			if (err) {
6113d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6114b411b363SPhilipp Reisner 				goto reconnect;
61151952e916SAndreas Gruenbacher 			}
6116b411b363SPhilipp Reisner 
6117bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6118f36af18cSLars Ellenberg 
6119668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6120668700b4SPhilipp Reisner 				set_idle_timeout(connection);
612144ed167dSPhilipp Reisner 				ping_timeout_active = false;
612244ed167dSPhilipp Reisner 			}
6123b411b363SPhilipp Reisner 
6124bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6125b411b363SPhilipp Reisner 			received = 0;
612652b061a4SAndreas Gruenbacher 			expect	 = header_size;
6127b411b363SPhilipp Reisner 			cmd	 = NULL;
6128b411b363SPhilipp Reisner 		}
6129b411b363SPhilipp Reisner 	}
6130b411b363SPhilipp Reisner 
6131b411b363SPhilipp Reisner 	if (0) {
6132b411b363SPhilipp Reisner reconnect:
6133bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6134bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6135b411b363SPhilipp Reisner 	}
6136b411b363SPhilipp Reisner 	if (0) {
6137b411b363SPhilipp Reisner disconnect:
6138bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6139b411b363SPhilipp Reisner 	}
6140b411b363SPhilipp Reisner 
6141668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6142b411b363SPhilipp Reisner 
6143b411b363SPhilipp Reisner 	return 0;
6144b411b363SPhilipp Reisner }
6145668700b4SPhilipp Reisner 
6146668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6147668700b4SPhilipp Reisner {
6148668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6149668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6150668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6151668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6152668700b4SPhilipp Reisner 	struct net_conf *nc;
6153668700b4SPhilipp Reisner 	int tcp_cork, err;
6154668700b4SPhilipp Reisner 
6155668700b4SPhilipp Reisner 	rcu_read_lock();
6156668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6157668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6158668700b4SPhilipp Reisner 	rcu_read_unlock();
6159668700b4SPhilipp Reisner 
6160668700b4SPhilipp Reisner 	if (tcp_cork)
6161db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6162668700b4SPhilipp Reisner 
6163668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6164668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6165668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6166668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6167668700b4SPhilipp Reisner 
6168668700b4SPhilipp Reisner 	if (err) {
6169668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6170668700b4SPhilipp Reisner 		return;
6171668700b4SPhilipp Reisner 	}
6172668700b4SPhilipp Reisner 
6173668700b4SPhilipp Reisner 	if (tcp_cork)
6174db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6175668700b4SPhilipp Reisner 
6176668700b4SPhilipp Reisner 	return;
6177668700b4SPhilipp Reisner }
6178