1*93c68cc4SChristoph Böhmwalder // SPDX-License-Identifier: GPL-2.0-only
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
37b411b363SPhilipp Reisner #include "drbd_int.h"
38a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner #include "drbd_vli.h"
41b411b363SPhilipp Reisner 
42f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4320c68fdeSLars Ellenberg 
4477351055SPhilipp Reisner struct packet_info {
4577351055SPhilipp Reisner 	enum drbd_packet cmd;
46e2857216SAndreas Gruenbacher 	unsigned int size;
47e2857216SAndreas Gruenbacher 	unsigned int vnr;
48e658983aSAndreas Gruenbacher 	void *data;
4977351055SPhilipp Reisner };
5077351055SPhilipp Reisner 
51b411b363SPhilipp Reisner enum finish_epoch {
52b411b363SPhilipp Reisner 	FE_STILL_LIVE,
53b411b363SPhilipp Reisner 	FE_DESTROYED,
54b411b363SPhilipp Reisner 	FE_RECYCLED,
55b411b363SPhilipp Reisner };
56b411b363SPhilipp Reisner 
57bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
58bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5969a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
60a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
61bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6299920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
66b411b363SPhilipp Reisner 
6745bb912bSLars Ellenberg /*
6845bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6945bb912bSLars Ellenberg  * page->private being our "next" pointer.
7045bb912bSLars Ellenberg  */
7145bb912bSLars Ellenberg 
7245bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7345bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7445bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7545bb912bSLars Ellenberg  */
7645bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7745bb912bSLars Ellenberg {
7845bb912bSLars Ellenberg 	struct page *page;
7945bb912bSLars Ellenberg 	struct page *tmp;
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg 	BUG_ON(!n);
8245bb912bSLars Ellenberg 	BUG_ON(!head);
8345bb912bSLars Ellenberg 
8445bb912bSLars Ellenberg 	page = *head;
8523ce4227SPhilipp Reisner 
8623ce4227SPhilipp Reisner 	if (!page)
8723ce4227SPhilipp Reisner 		return NULL;
8823ce4227SPhilipp Reisner 
8945bb912bSLars Ellenberg 	while (page) {
9045bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9145bb912bSLars Ellenberg 		if (--n == 0)
9245bb912bSLars Ellenberg 			break; /* found sufficient pages */
9345bb912bSLars Ellenberg 		if (tmp == NULL)
9445bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9545bb912bSLars Ellenberg 			return NULL;
9645bb912bSLars Ellenberg 		page = tmp;
9745bb912bSLars Ellenberg 	}
9845bb912bSLars Ellenberg 
9945bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10045bb912bSLars Ellenberg 	set_page_private(page, 0);
10145bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10245bb912bSLars Ellenberg 	page = *head;
10345bb912bSLars Ellenberg 	*head = tmp;
10445bb912bSLars Ellenberg 	return page;
10545bb912bSLars Ellenberg }
10645bb912bSLars Ellenberg 
10745bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10845bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10945bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11045bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11145bb912bSLars Ellenberg {
11245bb912bSLars Ellenberg 	struct page *tmp;
11345bb912bSLars Ellenberg 	int i = 1;
114e8628013SJoe Perches 	while ((tmp = page_chain_next(page))) {
115e8628013SJoe Perches 		++i;
116e8628013SJoe Perches 		page = tmp;
117e8628013SJoe Perches 	}
11845bb912bSLars Ellenberg 	if (len)
11945bb912bSLars Ellenberg 		*len = i;
12045bb912bSLars Ellenberg 	return page;
12145bb912bSLars Ellenberg }
12245bb912bSLars Ellenberg 
12345bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12445bb912bSLars Ellenberg {
12545bb912bSLars Ellenberg 	struct page *tmp;
12645bb912bSLars Ellenberg 	int i = 0;
12745bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12845bb912bSLars Ellenberg 		put_page(page);
12945bb912bSLars Ellenberg 		++i;
13045bb912bSLars Ellenberg 	}
13145bb912bSLars Ellenberg 	return i;
13245bb912bSLars Ellenberg }
13345bb912bSLars Ellenberg 
13445bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13545bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13645bb912bSLars Ellenberg {
13745bb912bSLars Ellenberg #if 1
13845bb912bSLars Ellenberg 	struct page *tmp;
13945bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14045bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14145bb912bSLars Ellenberg #endif
14245bb912bSLars Ellenberg 
14345bb912bSLars Ellenberg 	/* add chain to head */
14445bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14545bb912bSLars Ellenberg 	*head = chain_first;
14645bb912bSLars Ellenberg }
14745bb912bSLars Ellenberg 
148b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14918c2d522SAndreas Gruenbacher 				       unsigned int number)
150b411b363SPhilipp Reisner {
151b411b363SPhilipp Reisner 	struct page *page = NULL;
15245bb912bSLars Ellenberg 	struct page *tmp = NULL;
15318c2d522SAndreas Gruenbacher 	unsigned int i = 0;
154b411b363SPhilipp Reisner 
155b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
156b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15745bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
158b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15945bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16045bb912bSLars Ellenberg 		if (page)
16145bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
162b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16345bb912bSLars Ellenberg 		if (page)
16445bb912bSLars Ellenberg 			return page;
165b411b363SPhilipp Reisner 	}
16645bb912bSLars Ellenberg 
167b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
168b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
169b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17045bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17145bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17245bb912bSLars Ellenberg 		if (!tmp)
17345bb912bSLars Ellenberg 			break;
17445bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17545bb912bSLars Ellenberg 		page = tmp;
17645bb912bSLars Ellenberg 	}
17745bb912bSLars Ellenberg 
17845bb912bSLars Ellenberg 	if (i == number)
179b411b363SPhilipp Reisner 		return page;
18045bb912bSLars Ellenberg 
18145bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
182c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18345bb912bSLars Ellenberg 	 * function "soon". */
18445bb912bSLars Ellenberg 	if (page) {
18545bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18645bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18745bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18845bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18945bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19045bb912bSLars Ellenberg 	}
19145bb912bSLars Ellenberg 	return NULL;
192b411b363SPhilipp Reisner }
193b411b363SPhilipp Reisner 
194b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
195a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
196b411b363SPhilipp Reisner {
197a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
198b411b363SPhilipp Reisner 
199b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
200b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
201b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
202b411b363SPhilipp Reisner 	   stop to examine the list... */
203b411b363SPhilipp Reisner 
204a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
205045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
206b411b363SPhilipp Reisner 			break;
207a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
208b411b363SPhilipp Reisner 	}
209b411b363SPhilipp Reisner }
210b411b363SPhilipp Reisner 
211668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
212b411b363SPhilipp Reisner {
213b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
214db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
215b411b363SPhilipp Reisner 
2160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
217b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
219a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
220b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
221b411b363SPhilipp Reisner }
222b411b363SPhilipp Reisner 
223668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
224668700b4SPhilipp Reisner {
225668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
226668700b4SPhilipp Reisner 	int vnr;
227668700b4SPhilipp Reisner 
228668700b4SPhilipp Reisner 	rcu_read_lock();
229668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
230668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
231668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
232668700b4SPhilipp Reisner 			continue;
233668700b4SPhilipp Reisner 
234668700b4SPhilipp Reisner 		kref_get(&device->kref);
235668700b4SPhilipp Reisner 		rcu_read_unlock();
236668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
237668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
238668700b4SPhilipp Reisner 		rcu_read_lock();
239668700b4SPhilipp Reisner 	}
240668700b4SPhilipp Reisner 	rcu_read_unlock();
241668700b4SPhilipp Reisner }
242668700b4SPhilipp Reisner 
243b411b363SPhilipp Reisner /**
244c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2459b48ff07SLee Jones  * @peer_device:	DRBD device.
24645bb912bSLars Ellenberg  * @number:		number of pages requested
24745bb912bSLars Ellenberg  * @retry:		whether to retry, if not enough pages are available right now
248b411b363SPhilipp Reisner  *
24945bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2500e49d7b0SLars Ellenberg  * the kernel.
25145bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25245bb912bSLars Ellenberg  *
2530e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2540e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2550e49d7b0SLars Ellenberg  *
2560e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2570e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2580e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2590e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2600e49d7b0SLars Ellenberg  *
26145bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
262b411b363SPhilipp Reisner  */
26369a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
264c37c8ecfSAndreas Gruenbacher 			      bool retry)
265b411b363SPhilipp Reisner {
26669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
267b411b363SPhilipp Reisner 	struct page *page = NULL;
26844ed167dSPhilipp Reisner 	struct net_conf *nc;
269b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2700e49d7b0SLars Ellenberg 	unsigned int mxb;
271b411b363SPhilipp Reisner 
27244ed167dSPhilipp Reisner 	rcu_read_lock();
27369a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27444ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27544ed167dSPhilipp Reisner 	rcu_read_unlock();
27644ed167dSPhilipp Reisner 
277b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
278b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
279b411b363SPhilipp Reisner 
280668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
281668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
282668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
283668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
284668700b4SPhilipp Reisner 
28545bb912bSLars Ellenberg 	while (page == NULL) {
286b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
287b411b363SPhilipp Reisner 
288668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
289b411b363SPhilipp Reisner 
290b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
291b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
292b411b363SPhilipp Reisner 			if (page)
293b411b363SPhilipp Reisner 				break;
294b411b363SPhilipp Reisner 		}
295b411b363SPhilipp Reisner 
296b411b363SPhilipp Reisner 		if (!retry)
297b411b363SPhilipp Reisner 			break;
298b411b363SPhilipp Reisner 
299b411b363SPhilipp Reisner 		if (signal_pending(current)) {
300d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
301b411b363SPhilipp Reisner 			break;
302b411b363SPhilipp Reisner 		}
303b411b363SPhilipp Reisner 
3040e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3050e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
306b411b363SPhilipp Reisner 	}
307b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
308b411b363SPhilipp Reisner 
30945bb912bSLars Ellenberg 	if (page)
310b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
311b411b363SPhilipp Reisner 	return page;
312b411b363SPhilipp Reisner }
313b411b363SPhilipp Reisner 
314c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3150500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31645bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31745bb912bSLars Ellenberg  * or returns all pages to the system. */
318b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
319b411b363SPhilipp Reisner {
320b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
321b411b363SPhilipp Reisner 	int i;
322435f0740SLars Ellenberg 
323a73ff323SLars Ellenberg 	if (page == NULL)
324a73ff323SLars Ellenberg 		return;
325a73ff323SLars Ellenberg 
326183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32745bb912bSLars Ellenberg 		i = page_chain_free(page);
32845bb912bSLars Ellenberg 	else {
32945bb912bSLars Ellenberg 		struct page *tmp;
33045bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
331b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33245bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33345bb912bSLars Ellenberg 		drbd_pp_vacant += i;
334b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
335b411b363SPhilipp Reisner 	}
336435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33745bb912bSLars Ellenberg 	if (i < 0)
338d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
339435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
340b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
341b411b363SPhilipp Reisner }
342b411b363SPhilipp Reisner 
343b411b363SPhilipp Reisner /*
344b411b363SPhilipp Reisner You need to hold the req_lock:
345b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
346b411b363SPhilipp Reisner 
347b411b363SPhilipp Reisner You must not have the req_lock:
3483967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3490db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3507721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
351b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
352a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
353b411b363SPhilipp Reisner  drbd_clear_done_ee()
354b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
355b411b363SPhilipp Reisner */
356b411b363SPhilipp Reisner 
3579104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3589104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3599104d31aSLars Ellenberg  * trim: payload_size == 0 */
360f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36169a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3629104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
363b411b363SPhilipp Reisner {
36469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
365db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
366a73ff323SLars Ellenberg 	struct page *page = NULL;
367e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(payload_size);
368b411b363SPhilipp Reisner 
369b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
370b411b363SPhilipp Reisner 		return NULL;
371b411b363SPhilipp Reisner 
3720892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
373db830c46SAndreas Gruenbacher 	if (!peer_req) {
374b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
375d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
376b411b363SPhilipp Reisner 		return NULL;
377b411b363SPhilipp Reisner 	}
378b411b363SPhilipp Reisner 
3799104d31aSLars Ellenberg 	if (nr_pages) {
380d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
381d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38245bb912bSLars Ellenberg 		if (!page)
38345bb912bSLars Ellenberg 			goto fail;
384a73ff323SLars Ellenberg 	}
385b411b363SPhilipp Reisner 
386c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
387c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
388db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3899104d31aSLars Ellenberg 	peer_req->i.size = request_size;
390db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
391c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
392a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
393db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3949a8e7753SAndreas Gruenbacher 	/*
3959a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3969a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3979a8e7753SAndreas Gruenbacher 	 */
398db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
399b411b363SPhilipp Reisner 
400db830c46SAndreas Gruenbacher 	return peer_req;
401b411b363SPhilipp Reisner 
40245bb912bSLars Ellenberg  fail:
4030892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
404b411b363SPhilipp Reisner 	return NULL;
405b411b363SPhilipp Reisner }
406b411b363SPhilipp Reisner 
407b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
408f6ffca9fSAndreas Gruenbacher 		       int is_net)
409b411b363SPhilipp Reisner {
41021ae5d7fSLars Ellenberg 	might_sleep();
411db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
412db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
413b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4140b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4150b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41621ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41721ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41821ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41921ae5d7fSLars Ellenberg 	}
4200892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
421b411b363SPhilipp Reisner }
422b411b363SPhilipp Reisner 
423b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
424b411b363SPhilipp Reisner {
425b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
426db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
427b411b363SPhilipp Reisner 	int count = 0;
428b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
429b411b363SPhilipp Reisner 
4300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
431b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
433b411b363SPhilipp Reisner 
434a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
435b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
436b411b363SPhilipp Reisner 		count++;
437b411b363SPhilipp Reisner 	}
438b411b363SPhilipp Reisner 	return count;
439b411b363SPhilipp Reisner }
440b411b363SPhilipp Reisner 
441b411b363SPhilipp Reisner /*
442a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
443b411b363SPhilipp Reisner  */
444b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
445b411b363SPhilipp Reisner {
446b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
447b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
448db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
449e2b3032bSAndreas Gruenbacher 	int err = 0;
450b411b363SPhilipp Reisner 
4510500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
452b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
453b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4540500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
455b411b363SPhilipp Reisner 
456a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
457b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
458b411b363SPhilipp Reisner 
459b411b363SPhilipp Reisner 	/* possible callbacks here:
460d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
461b411b363SPhilipp Reisner 	 * all ignore the last argument.
462b411b363SPhilipp Reisner 	 */
463a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
464e2b3032bSAndreas Gruenbacher 		int err2;
465e2b3032bSAndreas Gruenbacher 
466b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
467a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
468e2b3032bSAndreas Gruenbacher 		if (!err)
469e2b3032bSAndreas Gruenbacher 			err = err2;
470b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
471b411b363SPhilipp Reisner 	}
472b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
473b411b363SPhilipp Reisner 
474e2b3032bSAndreas Gruenbacher 	return err;
475b411b363SPhilipp Reisner }
476b411b363SPhilipp Reisner 
477b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
478d4da1537SAndreas Gruenbacher 				     struct list_head *head)
479b411b363SPhilipp Reisner {
480b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
481b411b363SPhilipp Reisner 
482b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
483b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
484b411b363SPhilipp Reisner 	while (!list_empty(head)) {
485b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4860500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4877eaceaccSJens Axboe 		io_schedule();
488b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4890500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
490b411b363SPhilipp Reisner 	}
491b411b363SPhilipp Reisner }
492b411b363SPhilipp Reisner 
493b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
494d4da1537SAndreas Gruenbacher 				    struct list_head *head)
495b411b363SPhilipp Reisner {
4960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
497b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
499b411b363SPhilipp Reisner }
500b411b363SPhilipp Reisner 
501dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
502b411b363SPhilipp Reisner {
503b411b363SPhilipp Reisner 	struct kvec iov = {
504b411b363SPhilipp Reisner 		.iov_base = buf,
505b411b363SPhilipp Reisner 		.iov_len = size,
506b411b363SPhilipp Reisner 	};
507b411b363SPhilipp Reisner 	struct msghdr msg = {
508b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
509b411b363SPhilipp Reisner 	};
510aa563d7bSDavid Howells 	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
511f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
512b411b363SPhilipp Reisner }
513b411b363SPhilipp Reisner 
514bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
515b411b363SPhilipp Reisner {
516b411b363SPhilipp Reisner 	int rv;
517b411b363SPhilipp Reisner 
518bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
519b411b363SPhilipp Reisner 
520b411b363SPhilipp Reisner 	if (rv < 0) {
521b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5221ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
523b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
525b411b363SPhilipp Reisner 	} else if (rv == 0) {
526bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
527b66623e3SPhilipp Reisner 			long t;
528b66623e3SPhilipp Reisner 			rcu_read_lock();
529bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
530b66623e3SPhilipp Reisner 			rcu_read_unlock();
531b66623e3SPhilipp Reisner 
532bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
533b66623e3SPhilipp Reisner 
534599377acSPhilipp Reisner 			if (t)
535599377acSPhilipp Reisner 				goto out;
536599377acSPhilipp Reisner 		}
5371ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
538599377acSPhilipp Reisner 	}
539599377acSPhilipp Reisner 
540b411b363SPhilipp Reisner 	if (rv != size)
541bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
542b411b363SPhilipp Reisner 
543599377acSPhilipp Reisner out:
544b411b363SPhilipp Reisner 	return rv;
545b411b363SPhilipp Reisner }
546b411b363SPhilipp Reisner 
547bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
548c6967746SAndreas Gruenbacher {
549c6967746SAndreas Gruenbacher 	int err;
550c6967746SAndreas Gruenbacher 
551bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
552c6967746SAndreas Gruenbacher 	if (err != size) {
553c6967746SAndreas Gruenbacher 		if (err >= 0)
554c6967746SAndreas Gruenbacher 			err = -EIO;
555c6967746SAndreas Gruenbacher 	} else
556c6967746SAndreas Gruenbacher 		err = 0;
557c6967746SAndreas Gruenbacher 	return err;
558c6967746SAndreas Gruenbacher }
559c6967746SAndreas Gruenbacher 
560bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
561a5c31904SAndreas Gruenbacher {
562a5c31904SAndreas Gruenbacher 	int err;
563a5c31904SAndreas Gruenbacher 
564bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
565a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5661ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
567a5c31904SAndreas Gruenbacher 	return err;
568a5c31904SAndreas Gruenbacher }
569a5c31904SAndreas Gruenbacher 
5705dbf1673SLars Ellenberg /* quoting tcp(7):
5715dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5725dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5735dbf1673SLars Ellenberg  * This is our wrapper to do so.
5745dbf1673SLars Ellenberg  */
5755dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5765dbf1673SLars Ellenberg 		unsigned int rcv)
5775dbf1673SLars Ellenberg {
5785dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5795dbf1673SLars Ellenberg 	if (snd) {
5805dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5815dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5825dbf1673SLars Ellenberg 	}
5835dbf1673SLars Ellenberg 	if (rcv) {
5845dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5855dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5865dbf1673SLars Ellenberg 	}
5875dbf1673SLars Ellenberg }
5885dbf1673SLars Ellenberg 
589bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
590b411b363SPhilipp Reisner {
591b411b363SPhilipp Reisner 	const char *what;
592b411b363SPhilipp Reisner 	struct socket *sock;
593b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59444ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59544ed167dSPhilipp Reisner 	struct net_conf *nc;
59644ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59769ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
598b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
599b411b363SPhilipp Reisner 
60044ed167dSPhilipp Reisner 	rcu_read_lock();
601bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60244ed167dSPhilipp Reisner 	if (!nc) {
60344ed167dSPhilipp Reisner 		rcu_read_unlock();
604b411b363SPhilipp Reisner 		return NULL;
60544ed167dSPhilipp Reisner 	}
60644ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60744ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60869ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
609089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61044ed167dSPhilipp Reisner 
611bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
612bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61544ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61644ed167dSPhilipp Reisner 	else
61744ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61844ed167dSPhilipp Reisner 
619bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
620bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
621b411b363SPhilipp Reisner 
622b411b363SPhilipp Reisner 	what = "sock_create_kern";
623eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
624b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
625b411b363SPhilipp Reisner 	if (err < 0) {
626b411b363SPhilipp Reisner 		sock = NULL;
627b411b363SPhilipp Reisner 		goto out;
628b411b363SPhilipp Reisner 	}
629b411b363SPhilipp Reisner 
630b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63169ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63244ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
633b411b363SPhilipp Reisner 
634b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
635b411b363SPhilipp Reisner 	*  for the outgoing connections.
636b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
637b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
638b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
639b411b363SPhilipp Reisner 	*  a free one dynamically.
640b411b363SPhilipp Reisner 	*/
641b411b363SPhilipp Reisner 	what = "bind before connect";
64244ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
643b411b363SPhilipp Reisner 	if (err < 0)
644b411b363SPhilipp Reisner 		goto out;
645b411b363SPhilipp Reisner 
646b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
647b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
648b411b363SPhilipp Reisner 	disconnect_on_error = 0;
649b411b363SPhilipp Reisner 	what = "connect";
65044ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner out:
653b411b363SPhilipp Reisner 	if (err < 0) {
654b411b363SPhilipp Reisner 		if (sock) {
655b411b363SPhilipp Reisner 			sock_release(sock);
656b411b363SPhilipp Reisner 			sock = NULL;
657b411b363SPhilipp Reisner 		}
658b411b363SPhilipp Reisner 		switch (-err) {
659b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
660b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
661b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
662b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
663b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
664b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
665b411b363SPhilipp Reisner 			disconnect_on_error = 0;
666b411b363SPhilipp Reisner 			break;
667b411b363SPhilipp Reisner 		default:
6681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
669b411b363SPhilipp Reisner 		}
670b411b363SPhilipp Reisner 		if (disconnect_on_error)
671bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
672b411b363SPhilipp Reisner 	}
67344ed167dSPhilipp Reisner 
674b411b363SPhilipp Reisner 	return sock;
675b411b363SPhilipp Reisner }
676b411b363SPhilipp Reisner 
6777a426fd8SPhilipp Reisner struct accept_wait_data {
678bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6797a426fd8SPhilipp Reisner 	struct socket *s_listen;
6807a426fd8SPhilipp Reisner 	struct completion door_bell;
6817a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6827a426fd8SPhilipp Reisner 
6837a426fd8SPhilipp Reisner };
6847a426fd8SPhilipp Reisner 
685715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
686b411b363SPhilipp Reisner {
6877a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
688715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6897a426fd8SPhilipp Reisner 
690715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
691715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6927a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
693715306f6SAndreas Gruenbacher 	state_change(sk);
6947a426fd8SPhilipp Reisner }
6957a426fd8SPhilipp Reisner 
696bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
697b411b363SPhilipp Reisner {
6981f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69944ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7001f3e509bSPhilipp Reisner 	struct socket *s_listen;
70144ed167dSPhilipp Reisner 	struct net_conf *nc;
702b411b363SPhilipp Reisner 	const char *what;
703b411b363SPhilipp Reisner 
70444ed167dSPhilipp Reisner 	rcu_read_lock();
705bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70644ed167dSPhilipp Reisner 	if (!nc) {
70744ed167dSPhilipp Reisner 		rcu_read_unlock();
7087a426fd8SPhilipp Reisner 		return -EIO;
70944ed167dSPhilipp Reisner 	}
71044ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71144ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71244ed167dSPhilipp Reisner 	rcu_read_unlock();
713b411b363SPhilipp Reisner 
714bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
715bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
716b411b363SPhilipp Reisner 
717b411b363SPhilipp Reisner 	what = "sock_create_kern";
718eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
719b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
720b411b363SPhilipp Reisner 	if (err) {
721b411b363SPhilipp Reisner 		s_listen = NULL;
722b411b363SPhilipp Reisner 		goto out;
723b411b363SPhilipp Reisner 	}
724b411b363SPhilipp Reisner 
7254a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72644ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
727b411b363SPhilipp Reisner 
728b411b363SPhilipp Reisner 	what = "bind before listen";
72944ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
730b411b363SPhilipp Reisner 	if (err < 0)
731b411b363SPhilipp Reisner 		goto out;
732b411b363SPhilipp Reisner 
7337a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7347a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7357a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
736715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7377a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7387a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
739b411b363SPhilipp Reisner 
7402820fd39SPhilipp Reisner 	what = "listen";
7412820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7422820fd39SPhilipp Reisner 	if (err < 0)
7432820fd39SPhilipp Reisner 		goto out;
7442820fd39SPhilipp Reisner 
7457a426fd8SPhilipp Reisner 	return 0;
746b411b363SPhilipp Reisner out:
747b411b363SPhilipp Reisner 	if (s_listen)
748b411b363SPhilipp Reisner 		sock_release(s_listen);
749b411b363SPhilipp Reisner 	if (err < 0) {
750b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7511ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
752bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
753b411b363SPhilipp Reisner 		}
754b411b363SPhilipp Reisner 	}
7551f3e509bSPhilipp Reisner 
7567a426fd8SPhilipp Reisner 	return -EIO;
7571f3e509bSPhilipp Reisner }
7581f3e509bSPhilipp Reisner 
759715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
760715306f6SAndreas Gruenbacher {
761715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
762715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
763715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
764715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher }
766715306f6SAndreas Gruenbacher 
767bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7681f3e509bSPhilipp Reisner {
7691f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7701f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7711f3e509bSPhilipp Reisner 	struct net_conf *nc;
7721f3e509bSPhilipp Reisner 
7731f3e509bSPhilipp Reisner 	rcu_read_lock();
774bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7751f3e509bSPhilipp Reisner 	if (!nc) {
7761f3e509bSPhilipp Reisner 		rcu_read_unlock();
7771f3e509bSPhilipp Reisner 		return NULL;
7781f3e509bSPhilipp Reisner 	}
7791f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7801f3e509bSPhilipp Reisner 	rcu_read_unlock();
7811f3e509bSPhilipp Reisner 
7821f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78338b682b2SAkinobu Mita 	/* 28.5% random jitter */
78481895a65SJason A. Donenfeld 	timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7;
7851f3e509bSPhilipp Reisner 
7867a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7877a426fd8SPhilipp Reisner 	if (err <= 0)
7887a426fd8SPhilipp Reisner 		return NULL;
7891f3e509bSPhilipp Reisner 
7907a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
791b411b363SPhilipp Reisner 	if (err < 0) {
792b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
794bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
795b411b363SPhilipp Reisner 		}
796b411b363SPhilipp Reisner 	}
797b411b363SPhilipp Reisner 
798715306f6SAndreas Gruenbacher 	if (s_estab)
799715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
800b411b363SPhilipp Reisner 
801b411b363SPhilipp Reisner 	return s_estab;
802b411b363SPhilipp Reisner }
803b411b363SPhilipp Reisner 
804bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
805b411b363SPhilipp Reisner 
806bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8079f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8089f5bdc33SAndreas Gruenbacher {
809bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8109f5bdc33SAndreas Gruenbacher 		return -EIO;
811bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
814bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
815b411b363SPhilipp Reisner {
816bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8179f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8184920e37aSPhilipp Reisner 	struct net_conf *nc;
8199f5bdc33SAndreas Gruenbacher 	int err;
820b411b363SPhilipp Reisner 
8214920e37aSPhilipp Reisner 	rcu_read_lock();
8224920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8234920e37aSPhilipp Reisner 	if (!nc) {
8244920e37aSPhilipp Reisner 		rcu_read_unlock();
8254920e37aSPhilipp Reisner 		return -EIO;
8264920e37aSPhilipp Reisner 	}
8274920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8284920e37aSPhilipp Reisner 	rcu_read_unlock();
8294920e37aSPhilipp Reisner 
830bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8319f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8329f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8339f5bdc33SAndreas Gruenbacher 			err = -EIO;
8349f5bdc33SAndreas Gruenbacher 		return err;
8359f5bdc33SAndreas Gruenbacher 	}
836bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8379f5bdc33SAndreas Gruenbacher 	if (err)
8389f5bdc33SAndreas Gruenbacher 		return err;
8399f5bdc33SAndreas Gruenbacher 	return pi.cmd;
840b411b363SPhilipp Reisner }
841b411b363SPhilipp Reisner 
842b411b363SPhilipp Reisner /**
843b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
844b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
845b411b363SPhilipp Reisner  */
8465d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
847b411b363SPhilipp Reisner {
848b411b363SPhilipp Reisner 	int rr;
849b411b363SPhilipp Reisner 	char tb[4];
850b411b363SPhilipp Reisner 
851b411b363SPhilipp Reisner 	if (!*sock)
85281e84650SAndreas Gruenbacher 		return false;
853b411b363SPhilipp Reisner 
854dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
855b411b363SPhilipp Reisner 
856b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85781e84650SAndreas Gruenbacher 		return true;
858b411b363SPhilipp Reisner 	} else {
859b411b363SPhilipp Reisner 		sock_release(*sock);
860b411b363SPhilipp Reisner 		*sock = NULL;
86181e84650SAndreas Gruenbacher 		return false;
862b411b363SPhilipp Reisner 	}
863b411b363SPhilipp Reisner }
8645d0b17f1SPhilipp Reisner 
8655d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8665d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8675d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8685d0b17f1SPhilipp Reisner {
8695d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8705d0b17f1SPhilipp Reisner 	int timeout;
8715d0b17f1SPhilipp Reisner 	bool ok;
8725d0b17f1SPhilipp Reisner 
8735d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8745d0b17f1SPhilipp Reisner 		return false;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	rcu_read_lock();
8775d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8785d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8795d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8805d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8815d0b17f1SPhilipp Reisner 
8825d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8835d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	return ok;
8865d0b17f1SPhilipp Reisner }
8875d0b17f1SPhilipp Reisner 
8882325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8892325eb66SPhilipp Reisner    in a connection */
89069a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
891907599e0SPhilipp Reisner {
89269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8930829f5edSAndreas Gruenbacher 	int err;
894907599e0SPhilipp Reisner 
895b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
896b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
897907599e0SPhilipp Reisner 
89869a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89969a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
900b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9018410da8fSPhilipp Reisner 
90269a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9030829f5edSAndreas Gruenbacher 	if (!err)
90469a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9050829f5edSAndreas Gruenbacher 	if (!err)
90669a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9070829f5edSAndreas Gruenbacher 	if (!err)
90869a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
909b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
910b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
911b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
912b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9130829f5edSAndreas Gruenbacher 	return err;
914907599e0SPhilipp Reisner }
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner /*
917b411b363SPhilipp Reisner  * return values:
918b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
919b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
920b411b363SPhilipp Reisner  *  -1 peer talks different language,
921b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
922b411b363SPhilipp Reisner  *  -2 We do not have a network config...
923b411b363SPhilipp Reisner  */
924bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
925b411b363SPhilipp Reisner {
9267da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
927c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92844ed167dSPhilipp Reisner 	struct net_conf *nc;
9295d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9305d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
931197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9327a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
933bde89a9eSAndreas Gruenbacher 		.connection = connection,
9347a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9357a426fd8SPhilipp Reisner 	};
936b411b363SPhilipp Reisner 
937bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
938bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
939b411b363SPhilipp Reisner 		return -2;
940b411b363SPhilipp Reisner 
9417da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
942bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
943bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9447da35862SPhilipp Reisner 	sock.socket = NULL;
9457da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
946bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
947bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9487da35862SPhilipp Reisner 	msock.socket = NULL;
9497da35862SPhilipp Reisner 
9500916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
951bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
952b411b363SPhilipp Reisner 
953bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9547a426fd8SPhilipp Reisner 		return 0;
955b411b363SPhilipp Reisner 
956b411b363SPhilipp Reisner 	do {
9572bf89621SAndreas Gruenbacher 		struct socket *s;
958b411b363SPhilipp Reisner 
959bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
960b411b363SPhilipp Reisner 		if (s) {
9617da35862SPhilipp Reisner 			if (!sock.socket) {
9627da35862SPhilipp Reisner 				sock.socket = s;
963bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9647da35862SPhilipp Reisner 			} else if (!msock.socket) {
965bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9667da35862SPhilipp Reisner 				msock.socket = s;
967bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
968b411b363SPhilipp Reisner 			} else {
9691ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
970b411b363SPhilipp Reisner 				goto out_release_sockets;
971b411b363SPhilipp Reisner 			}
972b411b363SPhilipp Reisner 		}
973b411b363SPhilipp Reisner 
9745d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
975b411b363SPhilipp Reisner 			break;
976b411b363SPhilipp Reisner 
977b411b363SPhilipp Reisner retry:
978bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
979b411b363SPhilipp Reisner 		if (s) {
980bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9817da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9827da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98392f14951SPhilipp Reisner 			switch (fp) {
984e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9857da35862SPhilipp Reisner 				if (sock.socket) {
9861ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9877da35862SPhilipp Reisner 					sock_release(sock.socket);
98880c6eed4SPhilipp Reisner 					sock.socket = s;
98980c6eed4SPhilipp Reisner 					goto randomize;
990b411b363SPhilipp Reisner 				}
9917da35862SPhilipp Reisner 				sock.socket = s;
992b411b363SPhilipp Reisner 				break;
993e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
994bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9957da35862SPhilipp Reisner 				if (msock.socket) {
9961ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9977da35862SPhilipp Reisner 					sock_release(msock.socket);
99880c6eed4SPhilipp Reisner 					msock.socket = s;
99980c6eed4SPhilipp Reisner 					goto randomize;
1000b411b363SPhilipp Reisner 				}
10017da35862SPhilipp Reisner 				msock.socket = s;
1002b411b363SPhilipp Reisner 				break;
1003b411b363SPhilipp Reisner 			default:
10041ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1005b411b363SPhilipp Reisner 				sock_release(s);
100680c6eed4SPhilipp Reisner randomize:
100781895a65SJason A. Donenfeld 				if (prandom_u32_max(2))
1008b411b363SPhilipp Reisner 					goto retry;
1009b411b363SPhilipp Reisner 			}
1010b411b363SPhilipp Reisner 		}
1011b411b363SPhilipp Reisner 
1012bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1013b411b363SPhilipp Reisner 			goto out_release_sockets;
1014b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1015b411b363SPhilipp Reisner 			flush_signals(current);
1016b411b363SPhilipp Reisner 			smp_rmb();
1017bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1018b411b363SPhilipp Reisner 				goto out_release_sockets;
1019b411b363SPhilipp Reisner 		}
1020b411b363SPhilipp Reisner 
10215d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1022b666dbf8SPhilipp Reisner 	} while (!ok);
1023b411b363SPhilipp Reisner 
10247a426fd8SPhilipp Reisner 	if (ad.s_listen)
10257a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1026b411b363SPhilipp Reisner 
102798683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102898683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1029b411b363SPhilipp Reisner 
10307da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10317da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1035b411b363SPhilipp Reisner 
1036b411b363SPhilipp Reisner 	/* NOT YET ...
1037bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10387da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10396038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1040b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104144ed167dSPhilipp Reisner 	rcu_read_lock();
1042bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1043b411b363SPhilipp Reisner 
10447da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10457da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104644ed167dSPhilipp Reisner 
10477da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104844ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104908b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105044ed167dSPhilipp Reisner 	rcu_read_unlock();
105144ed167dSPhilipp Reisner 
10527da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1053b411b363SPhilipp Reisner 
1054b411b363SPhilipp Reisner 	/* we don't want delays.
105525985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105612abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105712abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1058b411b363SPhilipp Reisner 
1059bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1060bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1061bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1062b411b363SPhilipp Reisner 
1063bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1064b411b363SPhilipp Reisner 	if (h <= 0)
1065b411b363SPhilipp Reisner 		return h;
1066b411b363SPhilipp Reisner 
1067bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1068b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1069bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1070b10d96cbSJohannes Thoma 		case -1:
10711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1072b411b363SPhilipp Reisner 			return -1;
1073b10d96cbSJohannes Thoma 		case 0:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1075b10d96cbSJohannes Thoma 			return 0;
1076b411b363SPhilipp Reisner 		}
1077b411b363SPhilipp Reisner 	}
1078b411b363SPhilipp Reisner 
1079bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1080bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10837e2455c1SPhilipp Reisner 		return -1;
10841e86ac48SPhilipp Reisner 
108513c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108613c76abaSPhilipp Reisner 	 * being promoted to Primary.
108713c76abaSPhilipp Reisner 	 *
108813c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108913c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109013c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109113c76abaSPhilipp Reisner 	 */
109231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109331007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109431007745SPhilipp Reisner 
1095cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1096cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109731007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1098cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112739e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112839e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112939e91a60SLars Ellenberg 	connection->ack_sender =
113039e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1131668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1132668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1133668700b4SPhilipp Reisner 		return 0;
1134668700b4SPhilipp Reisner 	}
1135b411b363SPhilipp Reisner 
11360500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113708b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113808b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113908b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114008b165baSPhilipp Reisner 	 * just to clear a single value. */
1141bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11420500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114308b165baSPhilipp Reisner 
1144d3fcb490SPhilipp Reisner 	return h;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner out_release_sockets:
11477a426fd8SPhilipp Reisner 	if (ad.s_listen)
11487a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11497da35862SPhilipp Reisner 	if (sock.socket)
11507da35862SPhilipp Reisner 		sock_release(sock.socket);
11517da35862SPhilipp Reisner 	if (msock.socket)
11527da35862SPhilipp Reisner 		sock_release(msock.socket);
1153b411b363SPhilipp Reisner 	return -1;
1154b411b363SPhilipp Reisner }
1155b411b363SPhilipp Reisner 
1156bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1157b411b363SPhilipp Reisner {
1158bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1159b411b363SPhilipp Reisner 
11600c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11610c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11620c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11630c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11650c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116602918be2SPhilipp Reisner 		}
11670c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11680c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11690c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11700c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1171e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1172e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1173e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1174b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1175eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1176e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1177e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1178e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1179e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1180e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118177351055SPhilipp Reisner 		pi->vnr = 0;
118202918be2SPhilipp Reisner 	} else {
11831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1184e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1185bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11868172f3e9SAndreas Gruenbacher 		return -EINVAL;
1187b411b363SPhilipp Reisner 	}
1188e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11898172f3e9SAndreas Gruenbacher 	return 0;
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
1192c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1193c51a0ef3SLars Ellenberg {
1194c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1195c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1196c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1197c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1198c51a0ef3SLars Ellenberg }
1199c51a0ef3SLars Ellenberg 
1200bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1201257d0af6SPhilipp Reisner {
1202bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120369bc7bc3SAndreas Gruenbacher 	int err;
1204257d0af6SPhilipp Reisner 
1205bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1206a5c31904SAndreas Gruenbacher 	if (err)
120769bc7bc3SAndreas Gruenbacher 		return err;
1208257d0af6SPhilipp Reisner 
1209bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1210bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1211b411b363SPhilipp Reisner 
121269bc7bc3SAndreas Gruenbacher 	return err;
1213b411b363SPhilipp Reisner }
1214b411b363SPhilipp Reisner 
1215c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1216c51a0ef3SLars Ellenberg {
1217c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1218c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1219c51a0ef3SLars Ellenberg 	int err;
1220c51a0ef3SLars Ellenberg 
1221c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1222c51a0ef3SLars Ellenberg 	if (err != size) {
1223c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1224c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1225c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1226c51a0ef3SLars Ellenberg 		 * received so far. */
1227c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1228ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1229c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1230c51a0ef3SLars Ellenberg 		}
1231c51a0ef3SLars Ellenberg 		if (err > 0) {
1232c51a0ef3SLars Ellenberg 			buffer += err;
1233c51a0ef3SLars Ellenberg 			size -= err;
1234c51a0ef3SLars Ellenberg 		}
1235c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1236c51a0ef3SLars Ellenberg 		if (err)
1237c51a0ef3SLars Ellenberg 			return err;
1238c51a0ef3SLars Ellenberg 	}
1239c51a0ef3SLars Ellenberg 
1240c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1241c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1242c51a0ef3SLars Ellenberg 
1243c51a0ef3SLars Ellenberg 	return err;
1244c51a0ef3SLars Ellenberg }
1245f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1246f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1247f9ff0da5SLars Ellenberg  * then wait for all completions.
1248f9ff0da5SLars Ellenberg  */
1249f9ff0da5SLars Ellenberg struct issue_flush_context {
1250f9ff0da5SLars Ellenberg 	atomic_t pending;
1251f9ff0da5SLars Ellenberg 	int error;
1252f9ff0da5SLars Ellenberg 	struct completion done;
1253f9ff0da5SLars Ellenberg };
1254f9ff0da5SLars Ellenberg struct one_flush_context {
1255f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1256f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1257f9ff0da5SLars Ellenberg };
1258f9ff0da5SLars Ellenberg 
12591ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1260f9ff0da5SLars Ellenberg {
1261f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1262f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1263f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1264f9ff0da5SLars Ellenberg 
12654e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12664e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12674e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1268f9ff0da5SLars Ellenberg 	}
1269f9ff0da5SLars Ellenberg 	kfree(octx);
1270f9ff0da5SLars Ellenberg 	bio_put(bio);
1271f9ff0da5SLars Ellenberg 
1272f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1273f9ff0da5SLars Ellenberg 	put_ldev(device);
1274f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1275f9ff0da5SLars Ellenberg 
1276f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1277f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1278f9ff0da5SLars Ellenberg }
1279f9ff0da5SLars Ellenberg 
1280f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1281f9ff0da5SLars Ellenberg {
128207888c66SChristoph Hellwig 	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
128307888c66SChristoph Hellwig 				    REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
1284f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
12854b1dc86dSChristoph Hellwig 
12864b1dc86dSChristoph Hellwig 	if (!octx) {
12874b1dc86dSChristoph Hellwig 		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
1288f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1289f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1290f9ff0da5SLars Ellenberg 		 */
1291f9ff0da5SLars Ellenberg 		bio_put(bio);
1292f9ff0da5SLars Ellenberg 
1293f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1294f9ff0da5SLars Ellenberg 		put_ldev(device);
1295f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1296f9ff0da5SLars Ellenberg 		return;
1297f9ff0da5SLars Ellenberg 	}
1298f9ff0da5SLars Ellenberg 
1299f9ff0da5SLars Ellenberg 	octx->device = device;
1300f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
1301f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1302f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
1303f9ff0da5SLars Ellenberg 
1304f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1305f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1306f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1307f9ff0da5SLars Ellenberg 	submit_bio(bio);
1308f9ff0da5SLars Ellenberg }
1309f9ff0da5SLars Ellenberg 
1310bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1311b411b363SPhilipp Reisner {
1312f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1313c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1314f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13154b0007c0SPhilipp Reisner 		int vnr;
1316b411b363SPhilipp Reisner 
1317f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1318f9ff0da5SLars Ellenberg 		ctx.error = 0;
1319f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1320f9ff0da5SLars Ellenberg 
1321615e087fSLars Ellenberg 		rcu_read_lock();
1322c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1323c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1324c06ece6bSAndreas Gruenbacher 
1325b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1326615e087fSLars Ellenberg 				continue;
1327b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1328615e087fSLars Ellenberg 			rcu_read_unlock();
13294b0007c0SPhilipp Reisner 
1330f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1331f9ff0da5SLars Ellenberg 
1332f9ff0da5SLars Ellenberg 			rcu_read_lock();
1333f9ff0da5SLars Ellenberg 		}
1334f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1335f9ff0da5SLars Ellenberg 
1336f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1337f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1338f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1339f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1340f9ff0da5SLars Ellenberg 
1341f9ff0da5SLars Ellenberg 		if (ctx.error) {
1342b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1343b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1344b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1345f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1346f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1347b411b363SPhilipp Reisner 		}
1348b411b363SPhilipp Reisner 	}
1349b411b363SPhilipp Reisner }
1350b411b363SPhilipp Reisner 
1351b411b363SPhilipp Reisner /**
1352b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13539b48ff07SLee Jones  * @connection:	DRBD connection.
1354b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1355b411b363SPhilipp Reisner  * @ev:		Epoch event.
1356b411b363SPhilipp Reisner  */
1357bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1358b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1359b411b363SPhilipp Reisner 					       enum epoch_event ev)
1360b411b363SPhilipp Reisner {
13612451fc3bSPhilipp Reisner 	int epoch_size;
1362b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1363b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1364b411b363SPhilipp Reisner 
1365bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1366b411b363SPhilipp Reisner 	do {
1367b411b363SPhilipp Reisner 		next_epoch = NULL;
1368b411b363SPhilipp Reisner 
1369b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1372b411b363SPhilipp Reisner 		case EV_PUT:
1373b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1374b411b363SPhilipp Reisner 			break;
1375b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1376b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1377b411b363SPhilipp Reisner 			break;
1378b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1379b411b363SPhilipp Reisner 			/* nothing to do*/
1380b411b363SPhilipp Reisner 			break;
1381b411b363SPhilipp Reisner 		}
1382b411b363SPhilipp Reisner 
1383b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1384b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138580f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1386b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1387bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1388bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1389bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1390b411b363SPhilipp Reisner 			}
13919ed57dcbSLars Ellenberg #if 0
13929ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13939ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139480f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1395bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13969ed57dcbSLars Ellenberg #endif
1397b411b363SPhilipp Reisner 
1398bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1399b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1400b411b363SPhilipp Reisner 				list_del(&epoch->list);
1401b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1402bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1403b411b363SPhilipp Reisner 				kfree(epoch);
1404b411b363SPhilipp Reisner 
1405b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1406b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1407b411b363SPhilipp Reisner 			} else {
1408b411b363SPhilipp Reisner 				epoch->flags = 0;
1409b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1410698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1411b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1412b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1413b411b363SPhilipp Reisner 			}
1414b411b363SPhilipp Reisner 		}
1415b411b363SPhilipp Reisner 
1416b411b363SPhilipp Reisner 		if (!next_epoch)
1417b411b363SPhilipp Reisner 			break;
1418b411b363SPhilipp Reisner 
1419b411b363SPhilipp Reisner 		epoch = next_epoch;
1420b411b363SPhilipp Reisner 	} while (1);
1421b411b363SPhilipp Reisner 
1422bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1423b411b363SPhilipp Reisner 
1424b411b363SPhilipp Reisner 	return rv;
1425b411b363SPhilipp Reisner }
1426b411b363SPhilipp Reisner 
14278fe39aacSPhilipp Reisner static enum write_ordering_e
14288fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14298fe39aacSPhilipp Reisner {
14308fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14318fe39aacSPhilipp Reisner 
14328fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14338fe39aacSPhilipp Reisner 
1434f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1435f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14388fe39aacSPhilipp Reisner 
14398fe39aacSPhilipp Reisner 	return wo;
14408fe39aacSPhilipp Reisner }
14418fe39aacSPhilipp Reisner 
14429b48ff07SLee Jones /*
1443b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1444b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1445b411b363SPhilipp Reisner  */
14468fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14478fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1448b411b363SPhilipp Reisner {
1449e9526580SPhilipp Reisner 	struct drbd_device *device;
1450b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14514b0007c0SPhilipp Reisner 	int vnr;
1452b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1453f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1454f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1455f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1456b411b363SPhilipp Reisner 	};
1457b411b363SPhilipp Reisner 
1458e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1459f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1460b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1461daeda1ccSPhilipp Reisner 	rcu_read_lock();
1462e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14638fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14648fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14658fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14668fe39aacSPhilipp Reisner 				bdev = NULL;
1467b30ab791SAndreas Gruenbacher 			put_ldev(device);
14684b0007c0SPhilipp Reisner 		}
14698fe39aacSPhilipp Reisner 	}
14708fe39aacSPhilipp Reisner 
14718fe39aacSPhilipp Reisner 	if (bdev)
14728fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14738fe39aacSPhilipp Reisner 
147470df7092SLars Ellenberg 	rcu_read_unlock();
147570df7092SLars Ellenberg 
1476e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1477f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1478e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1479b411b363SPhilipp Reisner }
1480b411b363SPhilipp Reisner 
1481f31e583aSLars Ellenberg /*
1482f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1483f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1484f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1485f31e583aSLars Ellenberg  * never even try to UNMAP.
1486f31e583aSLars Ellenberg  *
1487f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1488f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1489f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1490f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1491f31e583aSLars Ellenberg  *
1492f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1495f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1496f31e583aSLars Ellenberg  *
1497f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1498f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1499f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1500f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1501f31e583aSLars Ellenberg  * still behaves this way.
1502f31e583aSLars Ellenberg  *
1503f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1504f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1505f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1506f31e583aSLars Ellenberg  *
1507f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1508f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1509f31e583aSLars Ellenberg  */
1510f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1511f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1512dd4f699dSLars Ellenberg {
15130dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1514f31e583aSLars Ellenberg 	sector_t tmp, nr;
1515f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1516f31e583aSLars Ellenberg 	int alignment;
1517f31e583aSLars Ellenberg 	int err = 0;
1518dd4f699dSLars Ellenberg 
1519f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1520f31e583aSLars Ellenberg 		goto zero_out;
1521f31e583aSLars Ellenberg 
1522f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
15237b47ef52SChristoph Hellwig 	granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
1524f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1525f31e583aSLars Ellenberg 
1526cf0fbf89SChristoph Hellwig 	max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
1527f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1528f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1529f31e583aSLars Ellenberg 		goto zero_out;
1530f31e583aSLars Ellenberg 
1531f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1532f31e583aSLars Ellenberg 		goto zero_out;
1533f31e583aSLars Ellenberg 
1534f31e583aSLars Ellenberg 	tmp = start;
1535f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1536f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1537f31e583aSLars Ellenberg 			goto zero_out;
1538f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1539f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1540f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1541f31e583aSLars Ellenberg 
1542f31e583aSLars Ellenberg 		nr = tmp - start;
1543f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1544f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1545f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1546f31e583aSLars Ellenberg 		nr_sectors -= nr;
1547f31e583aSLars Ellenberg 		start = tmp;
1548f31e583aSLars Ellenberg 	}
1549f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
155044abff2cSChristoph Hellwig 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
155144abff2cSChristoph Hellwig 					    GFP_NOIO);
1552f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1553f31e583aSLars Ellenberg 		start += max_discard_sectors;
1554f31e583aSLars Ellenberg 	}
1555f31e583aSLars Ellenberg 	if (nr_sectors) {
1556f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1557f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1558f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1559f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1560f31e583aSLars Ellenberg 		nr = nr_sectors;
1561f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1562f31e583aSLars Ellenberg 		if (nr) {
156344abff2cSChristoph Hellwig 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
1564f31e583aSLars Ellenberg 			nr_sectors -= nr;
1565f31e583aSLars Ellenberg 			start += nr;
1566f31e583aSLars Ellenberg 		}
1567f31e583aSLars Ellenberg 	}
1568f31e583aSLars Ellenberg  zero_out:
1569f31e583aSLars Ellenberg 	if (nr_sectors) {
1570f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1571f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1572f31e583aSLars Ellenberg 	}
1573f31e583aSLars Ellenberg 	return err != 0;
1574f31e583aSLars Ellenberg }
1575f31e583aSLars Ellenberg 
1576f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1577f31e583aSLars Ellenberg {
1578f31e583aSLars Ellenberg 	struct disk_conf *dc;
1579f31e583aSLars Ellenberg 	bool can_do;
1580f31e583aSLars Ellenberg 
158170200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
1582f31e583aSLars Ellenberg 		return false;
1583f31e583aSLars Ellenberg 
1584f31e583aSLars Ellenberg 	rcu_read_lock();
1585f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1586f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1587f31e583aSLars Ellenberg 	rcu_read_unlock();
1588f31e583aSLars Ellenberg 	return can_do;
1589f31e583aSLars Ellenberg }
1590f31e583aSLars Ellenberg 
1591f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1592f31e583aSLars Ellenberg {
1593f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1594f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1595f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1596f31e583aSLars Ellenberg 	 * otherwise. */
1597f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1598f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1599f31e583aSLars Ellenberg 
1600f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1601f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1602dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1603dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1604dd4f699dSLars Ellenberg }
1605dd4f699dSLars Ellenberg 
1606ce668b6dSChristoph Böhmwalder static int peer_request_fault_type(struct drbd_peer_request *peer_req)
1607ce668b6dSChristoph Böhmwalder {
1608ce668b6dSChristoph Böhmwalder 	if (peer_req_op(peer_req) == REQ_OP_READ) {
1609ce668b6dSChristoph Böhmwalder 		return peer_req->flags & EE_APPLICATION ?
1610ce668b6dSChristoph Böhmwalder 			DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
1611ce668b6dSChristoph Böhmwalder 	} else {
1612ce668b6dSChristoph Böhmwalder 		return peer_req->flags & EE_APPLICATION ?
1613ce668b6dSChristoph Böhmwalder 			DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
1614ce668b6dSChristoph Böhmwalder 	}
1615ce668b6dSChristoph Böhmwalder }
1616ce668b6dSChristoph Böhmwalder 
1617a34592ffSChristoph Hellwig /**
1618fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1619db830c46SAndreas Gruenbacher  * @peer_req:	peer request
162010f6d992SLars Ellenberg  *
162110f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162210f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162310f6d992SLars Ellenberg  *
162410f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
162510f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
162610f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
162710f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
162810f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
162910f6d992SLars Ellenberg  *  on certain Xen deployments.
163045bb912bSLars Ellenberg  */
163145bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1632ce668b6dSChristoph Böhmwalder int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
163345bb912bSLars Ellenberg {
1634ce668b6dSChristoph Böhmwalder 	struct drbd_device *device = peer_req->peer_device->device;
163545bb912bSLars Ellenberg 	struct bio *bios = NULL;
163645bb912bSLars Ellenberg 	struct bio *bio;
1637db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1638db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1639e6be38a1SCai Huoqing 	unsigned int data_size = peer_req->i.size;
1640e6be38a1SCai Huoqing 	unsigned int n_bios = 0;
1641e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(data_size);
164245bb912bSLars Ellenberg 
1643dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1644dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1645dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1646dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1647dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1648dd4f699dSLars Ellenberg 	 */
1649a34592ffSChristoph Hellwig 	if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
1650a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1651a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16525dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
165345d2933cSLars Ellenberg 		/* add it to the active list now,
165445d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
165521ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
165621ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1657700ca8c0SPhilipp Reisner 
1658700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1659700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1660700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166145d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
166245d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
166345d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1664700ca8c0SPhilipp Reisner 		}
1665700ca8c0SPhilipp Reisner 
1666f31e583aSLars Ellenberg 		drbd_issue_peer_discard_or_zero_out(device, peer_req);
1667a0fb3c47SLars Ellenberg 		return 0;
1668a0fb3c47SLars Ellenberg 	}
1669a0fb3c47SLars Ellenberg 
167045bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
167145bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
167245bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16739476f39dSLars Ellenberg 	 * request in more than one bio.
16749476f39dSLars Ellenberg 	 *
16759476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16769476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16779476f39dSLars Ellenberg 	 */
167845bb912bSLars Ellenberg next_bio:
1679ce668b6dSChristoph Böhmwalder 	/* _DISCARD, _WRITE_ZEROES handled above.
1680ce668b6dSChristoph Böhmwalder 	 * REQ_OP_FLUSH (empty flush) not expected,
1681ce668b6dSChristoph Böhmwalder 	 * should have been mapped to a "drbd protocol barrier".
1682ce668b6dSChristoph Böhmwalder 	 * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
1683ce668b6dSChristoph Böhmwalder 	 */
1684ce668b6dSChristoph Böhmwalder 	if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
1685ce668b6dSChristoph Böhmwalder 				peer_req_op(peer_req) == REQ_OP_READ)) {
1686ce668b6dSChristoph Böhmwalder 		drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
1687ce668b6dSChristoph Böhmwalder 		return -EINVAL;
1688ce668b6dSChristoph Böhmwalder 	}
1689ce668b6dSChristoph Böhmwalder 
1690ce668b6dSChristoph Böhmwalder 	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
1691db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16924f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1693db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1694fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
169545bb912bSLars Ellenberg 
169645bb912bSLars Ellenberg 	bio->bi_next = bios;
169745bb912bSLars Ellenberg 	bios = bio;
169845bb912bSLars Ellenberg 	++n_bios;
169945bb912bSLars Ellenberg 
170045bb912bSLars Ellenberg 	page_chain_for_each(page) {
170111f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170206efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170345bb912bSLars Ellenberg 			goto next_bio;
170411f8b2b6SAndreas Gruenbacher 		data_size -= len;
170545bb912bSLars Ellenberg 		sector += len >> 9;
170645bb912bSLars Ellenberg 		--nr_pages;
170745bb912bSLars Ellenberg 	}
170811f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1709a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
171045bb912bSLars Ellenberg 
1711db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171221ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
171421ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
171545bb912bSLars Ellenberg 	do {
171645bb912bSLars Ellenberg 		bio = bios;
171745bb912bSLars Ellenberg 		bios = bios->bi_next;
171845bb912bSLars Ellenberg 		bio->bi_next = NULL;
171945bb912bSLars Ellenberg 
1720ce668b6dSChristoph Böhmwalder 		drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
172145bb912bSLars Ellenberg 	} while (bios);
172245bb912bSLars Ellenberg 	return 0;
172345bb912bSLars Ellenberg }
172445bb912bSLars Ellenberg 
1725b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1726db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
172753840641SAndreas Gruenbacher {
1728db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
172953840641SAndreas Gruenbacher 
1730b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
173153840641SAndreas Gruenbacher 	drbd_clear_interval(i);
173253840641SAndreas Gruenbacher 
17336c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
173453840641SAndreas Gruenbacher 	if (i->waiting)
1735b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
173653840641SAndreas Gruenbacher }
173753840641SAndreas Gruenbacher 
1738bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
173977fede51SPhilipp Reisner {
1740c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
174177fede51SPhilipp Reisner 	int vnr;
174277fede51SPhilipp Reisner 
174377fede51SPhilipp Reisner 	rcu_read_lock();
1744c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1745c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1746c06ece6bSAndreas Gruenbacher 
1747b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
174877fede51SPhilipp Reisner 		rcu_read_unlock();
1749b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
175005a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
175177fede51SPhilipp Reisner 		rcu_read_lock();
175277fede51SPhilipp Reisner 	}
175377fede51SPhilipp Reisner 	rcu_read_unlock();
175477fede51SPhilipp Reisner }
175577fede51SPhilipp Reisner 
1756bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1757b411b363SPhilipp Reisner {
17582451fc3bSPhilipp Reisner 	int rv;
1759e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1760b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1761b411b363SPhilipp Reisner 
17629ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17639ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17649ed57dcbSLars Ellenberg 	 */
1765bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1766bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1767bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1768b411b363SPhilipp Reisner 
1769b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1770b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1771b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1772b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1773b411b363SPhilipp Reisner 	 * completed. */
1774e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1775f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1776b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
177782bc0194SAndreas Gruenbacher 			return 0;
1778b411b363SPhilipp Reisner 
1779b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1780b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1781b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17822451fc3bSPhilipp Reisner 		if (epoch)
17832451fc3bSPhilipp Reisner 			break;
17842451fc3bSPhilipp Reisner 		else
17851ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1786df561f66SGustavo A. R. Silva 		fallthrough;
17872451fc3bSPhilipp Reisner 
1788f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1789f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1790bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1791bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
17922451fc3bSPhilipp Reisner 
1793bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
17942451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17952451fc3bSPhilipp Reisner 			if (epoch)
17962451fc3bSPhilipp Reisner 				break;
1797b411b363SPhilipp Reisner 		}
1798b411b363SPhilipp Reisner 
179982bc0194SAndreas Gruenbacher 		return 0;
18002451fc3bSPhilipp Reisner 	default:
1801e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1802e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
180382bc0194SAndreas Gruenbacher 		return -EIO;
1804b411b363SPhilipp Reisner 	}
1805b411b363SPhilipp Reisner 
1806b411b363SPhilipp Reisner 	epoch->flags = 0;
1807b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1808b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1809b411b363SPhilipp Reisner 
1810bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1811bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1812bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1813bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1814bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1815b411b363SPhilipp Reisner 	} else {
1816b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1817b411b363SPhilipp Reisner 		kfree(epoch);
1818b411b363SPhilipp Reisner 	}
1819bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1820b411b363SPhilipp Reisner 
182182bc0194SAndreas Gruenbacher 	return 0;
1822b411b363SPhilipp Reisner }
1823b411b363SPhilipp Reisner 
18249104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18253d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18269104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18279104d31aSLars Ellenberg 			      unsigned int payload_size)
18289104d31aSLars Ellenberg {
18299104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18309104d31aSLars Ellenberg 	r->i.size = payload_size;
18319104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18329104d31aSLars Ellenberg 	r->i.size = tmp;
18339104d31aSLars Ellenberg }
18349104d31aSLars Ellenberg 
1835b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18369104d31aSLars Ellenberg  * and from receive_Data.
18379104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18389104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18399104d31aSLars Ellenberg  * 	for discards, that is zero.
18409104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18419104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18429104d31aSLars Ellenberg  * as extra argument in the packet header.
18439104d31aSLars Ellenberg  */
1844f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
184569a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1846a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1847b411b363SPhilipp Reisner {
184869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1849155bd9d1SChristoph Hellwig 	const sector_t capacity = get_capacity(device->vdisk);
1850db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1851b411b363SPhilipp Reisner 	struct page *page;
185211f8b2b6SAndreas Gruenbacher 	int digest_size, err;
185311f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
185469a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
185569a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18566b4388acSPhilipp Reisner 	unsigned long *data;
1857a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1858f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1859b411b363SPhilipp Reisner 
186011f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1861a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18623d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18639f5bdc33SAndreas Gruenbacher 		/*
18649f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18659f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18669f5bdc33SAndreas Gruenbacher 		 */
186711f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1868a5c31904SAndreas Gruenbacher 		if (err)
1869b411b363SPhilipp Reisner 			return NULL;
187011f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
187188104ca4SAndreas Gruenbacher 	}
1872b411b363SPhilipp Reisner 
1873a34592ffSChristoph Hellwig 	/* assume request_size == data_size, but special case trim. */
18749104d31aSLars Ellenberg 	ds = data_size;
1875a0fb3c47SLars Ellenberg 	if (trim) {
18769104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18779104d31aSLars Ellenberg 			return NULL;
18789104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1879f31e583aSLars Ellenberg 	} else if (zeroes) {
1880f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1881f31e583aSLars Ellenberg 			return NULL;
1882f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
1883a0fb3c47SLars Ellenberg 	}
1884a0fb3c47SLars Ellenberg 
18859104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1886841ce241SAndreas Gruenbacher 		return NULL;
1887a34592ffSChristoph Hellwig 	if (trim || zeroes) {
18889104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
18899104d31aSLars Ellenberg 			return NULL;
18909104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1891841ce241SAndreas Gruenbacher 		return NULL;
1892b411b363SPhilipp Reisner 
18936666032aSLars Ellenberg 	/* even though we trust out peer,
18946666032aSLars Ellenberg 	 * we sometimes have to double check. */
18959104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1896d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1897fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
18986666032aSLars Ellenberg 			(unsigned long long)capacity,
18999104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19006666032aSLars Ellenberg 		return NULL;
19016666032aSLars Ellenberg 	}
19026666032aSLars Ellenberg 
1903b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1904b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1905b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19069104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1907db830c46SAndreas Gruenbacher 	if (!peer_req)
1908b411b363SPhilipp Reisner 		return NULL;
190945bb912bSLars Ellenberg 
191021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19119104d31aSLars Ellenberg 	if (trim) {
1912f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1913f31e583aSLars Ellenberg 		return peer_req;
1914f31e583aSLars Ellenberg 	}
1915f31e583aSLars Ellenberg 	if (zeroes) {
1916f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
191781a3537aSLars Ellenberg 		return peer_req;
19189104d31aSLars Ellenberg 	}
1919a73ff323SLars Ellenberg 
19209104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1921b411b363SPhilipp Reisner 	ds = data_size;
1922db830c46SAndreas Gruenbacher 	page = peer_req->pages;
192345bb912bSLars Ellenberg 	page_chain_for_each(page) {
192445bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19256b4388acSPhilipp Reisner 		data = kmap(page);
192669a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1927b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1928d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19296b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19306b4388acSPhilipp Reisner 		}
1931b411b363SPhilipp Reisner 		kunmap(page);
1932a5c31904SAndreas Gruenbacher 		if (err) {
1933b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1934b411b363SPhilipp Reisner 			return NULL;
1935b411b363SPhilipp Reisner 		}
1936a5c31904SAndreas Gruenbacher 		ds -= len;
1937b411b363SPhilipp Reisner 	}
1938b411b363SPhilipp Reisner 
193911f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19409104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
194111f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1942d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1943470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1944b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1945b411b363SPhilipp Reisner 			return NULL;
1946b411b363SPhilipp Reisner 		}
1947b411b363SPhilipp Reisner 	}
1948b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1949db830c46SAndreas Gruenbacher 	return peer_req;
1950b411b363SPhilipp Reisner }
1951b411b363SPhilipp Reisner 
1952b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1953b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1954b411b363SPhilipp Reisner  */
195569a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1956b411b363SPhilipp Reisner {
1957b411b363SPhilipp Reisner 	struct page *page;
1958a5c31904SAndreas Gruenbacher 	int err = 0;
1959b411b363SPhilipp Reisner 	void *data;
1960b411b363SPhilipp Reisner 
1961c3470cdeSLars Ellenberg 	if (!data_size)
1962fc5be839SAndreas Gruenbacher 		return 0;
1963c3470cdeSLars Ellenberg 
196469a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1965b411b363SPhilipp Reisner 
1966b411b363SPhilipp Reisner 	data = kmap(page);
1967b411b363SPhilipp Reisner 	while (data_size) {
1968fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1969fc5be839SAndreas Gruenbacher 
197069a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1971a5c31904SAndreas Gruenbacher 		if (err)
1972b411b363SPhilipp Reisner 			break;
1973a5c31904SAndreas Gruenbacher 		data_size -= len;
1974b411b363SPhilipp Reisner 	}
1975b411b363SPhilipp Reisner 	kunmap(page);
197669a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1977fc5be839SAndreas Gruenbacher 	return err;
1978b411b363SPhilipp Reisner }
1979b411b363SPhilipp Reisner 
198069a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1981b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1982b411b363SPhilipp Reisner {
19837988613bSKent Overstreet 	struct bio_vec bvec;
19847988613bSKent Overstreet 	struct bvec_iter iter;
1985b411b363SPhilipp Reisner 	struct bio *bio;
198611f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
198769a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
198869a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
1989b411b363SPhilipp Reisner 
199011f8b2b6SAndreas Gruenbacher 	digest_size = 0;
199169a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
19923d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
199311f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1994a5c31904SAndreas Gruenbacher 		if (err)
1995a5c31904SAndreas Gruenbacher 			return err;
199611f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
199788104ca4SAndreas Gruenbacher 	}
1998b411b363SPhilipp Reisner 
1999b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2000b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
200169a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2002b411b363SPhilipp Reisner 
2003b411b363SPhilipp Reisner 	bio = req->master_bio;
200469a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2005b411b363SPhilipp Reisner 
20067988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20073eddaa60SChristoph Hellwig 		void *mapped = bvec_kmap_local(&bvec);
20087988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
200969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20103eddaa60SChristoph Hellwig 		kunmap_local(mapped);
2011a5c31904SAndreas Gruenbacher 		if (err)
2012a5c31904SAndreas Gruenbacher 			return err;
2013a5c31904SAndreas Gruenbacher 		data_size -= expect;
2014b411b363SPhilipp Reisner 	}
2015b411b363SPhilipp Reisner 
201611f8b2b6SAndreas Gruenbacher 	if (digest_size) {
201769a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
201811f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
201969a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
202028284cefSAndreas Gruenbacher 			return -EINVAL;
2021b411b363SPhilipp Reisner 		}
2022b411b363SPhilipp Reisner 	}
2023b411b363SPhilipp Reisner 
202469a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
202528284cefSAndreas Gruenbacher 	return 0;
2026b411b363SPhilipp Reisner }
2027b411b363SPhilipp Reisner 
2028a990be46SAndreas Gruenbacher /*
2029668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2030a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2031a990be46SAndreas Gruenbacher  */
203299920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2033b411b363SPhilipp Reisner {
20348050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2035a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2036a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2037a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2038db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
203999920dc5SAndreas Gruenbacher 	int err;
2040b411b363SPhilipp Reisner 
20410b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2042b411b363SPhilipp Reisner 
2043db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2044b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2045a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2046b411b363SPhilipp Reisner 	} else {
2047b411b363SPhilipp Reisner 		/* Record failure to sync */
2048b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2049b411b363SPhilipp Reisner 
2050a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2051b411b363SPhilipp Reisner 	}
2052b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2053b411b363SPhilipp Reisner 
205499920dc5SAndreas Gruenbacher 	return err;
2055b411b363SPhilipp Reisner }
2056b411b363SPhilipp Reisner 
205769a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2058a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2059b411b363SPhilipp Reisner {
206069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2061db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2062b411b363SPhilipp Reisner 
2063a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2064db830c46SAndreas Gruenbacher 	if (!peer_req)
206545bb912bSLars Ellenberg 		goto fail;
2066b411b363SPhilipp Reisner 
2067b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2068b411b363SPhilipp Reisner 
2069b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2070b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2071b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2072b411b363SPhilipp Reisner 
2073a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
2074ce668b6dSChristoph Böhmwalder 	peer_req->opf = REQ_OP_WRITE;
207521ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
207645bb912bSLars Ellenberg 
20770500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2078b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
20790500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2080b411b363SPhilipp Reisner 
2081a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2082ce668b6dSChristoph Böhmwalder 	if (drbd_submit_peer_request(peer_req) == 0)
2083e1c1b0fcSAndreas Gruenbacher 		return 0;
208445bb912bSLars Ellenberg 
208510f6d992SLars Ellenberg 	/* don't care for the reason here */
2086d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
20870500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2088a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
20890500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
209022cc37a9SLars Ellenberg 
2091b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
209245bb912bSLars Ellenberg fail:
2093b30ab791SAndreas Gruenbacher 	put_ldev(device);
2094e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2095b411b363SPhilipp Reisner }
2096b411b363SPhilipp Reisner 
2097668eebc6SAndreas Gruenbacher static struct drbd_request *
2098b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2099bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2100b411b363SPhilipp Reisner {
2101b411b363SPhilipp Reisner 	struct drbd_request *req;
2102668eebc6SAndreas Gruenbacher 
2103bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2104bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21055e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2106668eebc6SAndreas Gruenbacher 		return req;
2107c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2108d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2109c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2110c3afd8f5SAndreas Gruenbacher 	}
2111668eebc6SAndreas Gruenbacher 	return NULL;
2112668eebc6SAndreas Gruenbacher }
2113668eebc6SAndreas Gruenbacher 
2114bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2115b411b363SPhilipp Reisner {
21169f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2117b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2118b411b363SPhilipp Reisner 	struct drbd_request *req;
2119b411b363SPhilipp Reisner 	sector_t sector;
212082bc0194SAndreas Gruenbacher 	int err;
2121e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21224a76b161SAndreas Gruenbacher 
21239f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21249f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21254a76b161SAndreas Gruenbacher 		return -EIO;
21269f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2127b411b363SPhilipp Reisner 
2128b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2129b411b363SPhilipp Reisner 
21300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2131b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2133c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
213482bc0194SAndreas Gruenbacher 		return -EIO;
2135b411b363SPhilipp Reisner 
213669a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
213782bc0194SAndreas Gruenbacher 	if (!err)
21388554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2139b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2140b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2141b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2142b411b363SPhilipp Reisner 
214382bc0194SAndreas Gruenbacher 	return err;
2144b411b363SPhilipp Reisner }
2145b411b363SPhilipp Reisner 
2146bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2147b411b363SPhilipp Reisner {
21489f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2149b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2150b411b363SPhilipp Reisner 	sector_t sector;
215182bc0194SAndreas Gruenbacher 	int err;
2152e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21534a76b161SAndreas Gruenbacher 
21549f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21559f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21564a76b161SAndreas Gruenbacher 		return -EIO;
21579f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2158b411b363SPhilipp Reisner 
2159b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21600b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2161b411b363SPhilipp Reisner 
2162b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2163b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2164b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2165fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2166a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2167b411b363SPhilipp Reisner 	} else {
2168b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2169d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2170b411b363SPhilipp Reisner 
217169a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2172b411b363SPhilipp Reisner 
217369a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2174b411b363SPhilipp Reisner 	}
2175b411b363SPhilipp Reisner 
2176b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2177778f271dSPhilipp Reisner 
217882bc0194SAndreas Gruenbacher 	return err;
2179b411b363SPhilipp Reisner }
2180b411b363SPhilipp Reisner 
2181b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
21827be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2183b411b363SPhilipp Reisner {
21847be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21857be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2186b411b363SPhilipp Reisner 
2187b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21887be8da07SAndreas Gruenbacher 		if (!i->local)
21897be8da07SAndreas Gruenbacher 			continue;
21907be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
21917be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
21927be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
21937be8da07SAndreas Gruenbacher 			continue;
21942312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
21952312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2196d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
21977be8da07SAndreas Gruenbacher 	}
21987be8da07SAndreas Gruenbacher }
21997be8da07SAndreas Gruenbacher 
2200a990be46SAndreas Gruenbacher /*
2201668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2202b411b363SPhilipp Reisner  */
220399920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2204b411b363SPhilipp Reisner {
22058050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2206a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2207a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2208a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2209db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
221099920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2211b411b363SPhilipp Reisner 
2212303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2213db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2214b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2215b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2216db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2217b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2218a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2219b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2220b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2221b411b363SPhilipp Reisner 		} else {
2222a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2223b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2224b411b363SPhilipp Reisner 			 * maybe assert this?  */
2225b411b363SPhilipp Reisner 		}
2226b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2227b411b363SPhilipp Reisner 	}
222808d0dabfSLars Ellenberg 
2229b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2230b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2231302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22320500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22330b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2234b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22357be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2236b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22370500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2238bb3bfe96SAndreas Gruenbacher 	} else
22390b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2240b411b363SPhilipp Reisner 
22415dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2242b411b363SPhilipp Reisner 
224399920dc5SAndreas Gruenbacher 	return err;
2244b411b363SPhilipp Reisner }
2245b411b363SPhilipp Reisner 
2246a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2247b411b363SPhilipp Reisner {
22488050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2249a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2250a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
225199920dc5SAndreas Gruenbacher 	int err;
2252b411b363SPhilipp Reisner 
2253a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2254a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2255b411b363SPhilipp Reisner 
225699920dc5SAndreas Gruenbacher 	return err;
2257b411b363SPhilipp Reisner }
2258b411b363SPhilipp Reisner 
2259d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2260b6a370baSPhilipp Reisner {
2261a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22627be8da07SAndreas Gruenbacher }
2263b6a370baSPhilipp Reisner 
226499920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22657be8da07SAndreas Gruenbacher {
2266a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2267a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2268a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
22697be8da07SAndreas Gruenbacher 
2270a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2271d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
22727be8da07SAndreas Gruenbacher }
22737be8da07SAndreas Gruenbacher 
22743e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
22753e394da1SAndreas Gruenbacher {
22763e394da1SAndreas Gruenbacher 	/*
22773e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
22783e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
22793e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
22803e394da1SAndreas Gruenbacher 	 */
22813e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
22823e394da1SAndreas Gruenbacher }
22833e394da1SAndreas Gruenbacher 
22843e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
22853e394da1SAndreas Gruenbacher {
22863e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
22873e394da1SAndreas Gruenbacher }
22883e394da1SAndreas Gruenbacher 
228969a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
22903e394da1SAndreas Gruenbacher {
229169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
22923c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
22933e394da1SAndreas Gruenbacher 
229469a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2295b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2296b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2297b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2298b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2299b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23003c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2301b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23023e394da1SAndreas Gruenbacher 	}
23037be8da07SAndreas Gruenbacher }
23043e394da1SAndreas Gruenbacher 
2305d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2306d93f6302SLars Ellenberg {
2307d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2308d93f6302SLars Ellenberg }
2309d93f6302SLars Ellenberg 
2310d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2311b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2312d93f6302SLars Ellenberg {
2313d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23147e5fec31SFabian Frederick 	bool rv = false;
2315b6a370baSPhilipp Reisner 
23160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2317a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2318d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2319d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23207e5fec31SFabian Frederick 			rv = true;
2321b6a370baSPhilipp Reisner 			break;
2322b6a370baSPhilipp Reisner 		}
2323b6a370baSPhilipp Reisner 	}
23240500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2325b6a370baSPhilipp Reisner 
2326b6a370baSPhilipp Reisner 	return rv;
2327b6a370baSPhilipp Reisner }
2328b6a370baSPhilipp Reisner 
2329b411b363SPhilipp Reisner /* Called from receive_Data.
2330b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2331b411b363SPhilipp Reisner  *
2332b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2333b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2334b411b363SPhilipp Reisner  * been sent.
2335b411b363SPhilipp Reisner  *
2336b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2337b411b363SPhilipp Reisner  *
2338b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2339b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2340b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2341b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2342b411b363SPhilipp Reisner  *
2343b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2344b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2345b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2346b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2347b411b363SPhilipp Reisner  *
2348b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2349b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
235069a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2351b411b363SPhilipp Reisner {
235269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2353b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2354b411b363SPhilipp Reisner 	long timeout;
2355b874d231SPhilipp Reisner 	int ret = 0, tp;
23567be8da07SAndreas Gruenbacher 
235769a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23587be8da07SAndreas Gruenbacher 		return 0;
23597be8da07SAndreas Gruenbacher 
2360b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2361b411b363SPhilipp Reisner 	for (;;) {
2362b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2363b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2364b411b363SPhilipp Reisner 			break;
23657be8da07SAndreas Gruenbacher 		}
2366b874d231SPhilipp Reisner 
2367b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2368b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2369b411b363SPhilipp Reisner 			break;
2370b411b363SPhilipp Reisner 		}
2371b874d231SPhilipp Reisner 
2372b874d231SPhilipp Reisner 		rcu_read_lock();
23735dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2374b874d231SPhilipp Reisner 		rcu_read_unlock();
2375b874d231SPhilipp Reisner 
2376b874d231SPhilipp Reisner 		if (!tp)
2377b874d231SPhilipp Reisner 			break;
2378b874d231SPhilipp Reisner 
2379b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2380b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2381b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
238244ed167dSPhilipp Reisner 		rcu_read_lock();
238369a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
238444ed167dSPhilipp Reisner 		rcu_read_unlock();
238571b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2386b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
23877be8da07SAndreas Gruenbacher 		if (!timeout) {
2388b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2389d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2390b411b363SPhilipp Reisner 			break;
2391b411b363SPhilipp Reisner 		}
2392b411b363SPhilipp Reisner 	}
2393b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2394b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2395b411b363SPhilipp Reisner 	return ret;
2396b411b363SPhilipp Reisner }
2397b411b363SPhilipp Reisner 
23989945172aSBart Van Assche static enum req_op wire_flags_to_bio_op(u32 dpf)
2399bb3cc85eSMike Christie {
2400f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
240145c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2402f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2403f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2404bb3cc85eSMike Christie 	else
2405bb3cc85eSMike Christie 		return REQ_OP_WRITE;
240676d2e7ecSPhilipp Reisner }
240776d2e7ecSPhilipp Reisner 
2408ce668b6dSChristoph Böhmwalder /* see also bio_flags_to_wire() */
2409ce668b6dSChristoph Böhmwalder static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
2410ce668b6dSChristoph Böhmwalder {
2411ce668b6dSChristoph Böhmwalder 	return wire_flags_to_bio_op(dpf) |
2412ce668b6dSChristoph Böhmwalder 		(dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2413ce668b6dSChristoph Böhmwalder 		(dpf & DP_FUA ? REQ_FUA : 0) |
2414ce668b6dSChristoph Böhmwalder 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2415ce668b6dSChristoph Böhmwalder }
2416ce668b6dSChristoph Böhmwalder 
2417b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24187be8da07SAndreas Gruenbacher 				    unsigned int size)
2419b411b363SPhilipp Reisner {
24207be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24217be8da07SAndreas Gruenbacher 
24227be8da07SAndreas Gruenbacher     repeat:
2423b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24247be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24257be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24267be8da07SAndreas Gruenbacher 
24277be8da07SAndreas Gruenbacher 		if (!i->local)
24287be8da07SAndreas Gruenbacher 			continue;
24297be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24307be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24317be8da07SAndreas Gruenbacher 			continue;
24327be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24337be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24340500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24357be8da07SAndreas Gruenbacher 		if (m.bio)
2436b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24370500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24387be8da07SAndreas Gruenbacher 		goto repeat;
24397be8da07SAndreas Gruenbacher 	}
24407be8da07SAndreas Gruenbacher }
24417be8da07SAndreas Gruenbacher 
2442b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24437be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24447be8da07SAndreas Gruenbacher {
2445e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2446bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24477be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24487be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24497be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24507be8da07SAndreas Gruenbacher 	bool equal;
24517be8da07SAndreas Gruenbacher 	int err;
24527be8da07SAndreas Gruenbacher 
24537be8da07SAndreas Gruenbacher 	/*
24547be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24557be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24567be8da07SAndreas Gruenbacher 	 */
2457b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24587be8da07SAndreas Gruenbacher 
24597be8da07SAndreas Gruenbacher     repeat:
2460b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24617be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24627be8da07SAndreas Gruenbacher 			continue;
246308d0dabfSLars Ellenberg 		if (i->completed)
246408d0dabfSLars Ellenberg 			continue;
24657be8da07SAndreas Gruenbacher 
24667be8da07SAndreas Gruenbacher 		if (!i->local) {
24677be8da07SAndreas Gruenbacher 			/*
24687be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
24697be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
24707be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
24717be8da07SAndreas Gruenbacher 			 */
2472b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
24737be8da07SAndreas Gruenbacher 			if (err)
24747be8da07SAndreas Gruenbacher 				goto out;
24757be8da07SAndreas Gruenbacher 			goto repeat;
24767be8da07SAndreas Gruenbacher 		}
24777be8da07SAndreas Gruenbacher 
24787be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
24797be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
24807be8da07SAndreas Gruenbacher 			/*
24817be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2482d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2483d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2484d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
24857be8da07SAndreas Gruenbacher 			 */
2486d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
24877be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
24887be8da07SAndreas Gruenbacher 
24897be8da07SAndreas Gruenbacher 			if (!equal)
2490d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
24917be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
24927be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
24937be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
24947be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2495d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
24967be8da07SAndreas Gruenbacher 
2497a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
24987be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2499a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2500668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25017be8da07SAndreas Gruenbacher 
25027be8da07SAndreas Gruenbacher 			err = -ENOENT;
25037be8da07SAndreas Gruenbacher 			goto out;
25047be8da07SAndreas Gruenbacher 		} else {
25057be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25067be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25077be8da07SAndreas Gruenbacher 
25087be8da07SAndreas Gruenbacher 			if (!equal)
2509d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25107be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25117be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25127be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25137be8da07SAndreas Gruenbacher 
25147be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25157be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25167be8da07SAndreas Gruenbacher 				/*
25177be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2518d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2519d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2520d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25217be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25227be8da07SAndreas Gruenbacher 				 *
25237be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25247be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25257be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25267be8da07SAndreas Gruenbacher 				 */
2527b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25287be8da07SAndreas Gruenbacher 				if (err) {
2529e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2530b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25317be8da07SAndreas Gruenbacher 					goto out;
25327be8da07SAndreas Gruenbacher 				}
25337be8da07SAndreas Gruenbacher 				goto repeat;
25347be8da07SAndreas Gruenbacher 			}
25357be8da07SAndreas Gruenbacher 			/*
25367be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25377be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25387be8da07SAndreas Gruenbacher 			 */
25397be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25407be8da07SAndreas Gruenbacher 		}
25417be8da07SAndreas Gruenbacher 	}
25427be8da07SAndreas Gruenbacher 	err = 0;
25437be8da07SAndreas Gruenbacher 
25447be8da07SAndreas Gruenbacher     out:
25457be8da07SAndreas Gruenbacher 	if (err)
2546b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25477be8da07SAndreas Gruenbacher 	return err;
25487be8da07SAndreas Gruenbacher }
25497be8da07SAndreas Gruenbacher 
2550b411b363SPhilipp Reisner /* mirrored write */
2551bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2552b411b363SPhilipp Reisner {
25539f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2554b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
255521ae5d7fSLars Ellenberg 	struct net_conf *nc;
2556b411b363SPhilipp Reisner 	sector_t sector;
2557db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2558e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25597be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2560b411b363SPhilipp Reisner 	u32 dp_flags;
2561302bdeaeSPhilipp Reisner 	int err, tp;
25627be8da07SAndreas Gruenbacher 
25639f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25649f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25654a76b161SAndreas Gruenbacher 		return -EIO;
25669f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2567b411b363SPhilipp Reisner 
2568b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
256982bc0194SAndreas Gruenbacher 		int err2;
2570b411b363SPhilipp Reisner 
257169a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
257269a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2573bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
257469a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
257582bc0194SAndreas Gruenbacher 		if (!err)
257682bc0194SAndreas Gruenbacher 			err = err2;
257782bc0194SAndreas Gruenbacher 		return err;
2578b411b363SPhilipp Reisner 	}
2579b411b363SPhilipp Reisner 
2580fcefa62eSAndreas Gruenbacher 	/*
2581fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2582fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2583fcefa62eSAndreas Gruenbacher 	 * end of this function.
2584fcefa62eSAndreas Gruenbacher 	 */
2585b411b363SPhilipp Reisner 
2586b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2587a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2588db830c46SAndreas Gruenbacher 	if (!peer_req) {
2589b30ab791SAndreas Gruenbacher 		put_ldev(device);
259082bc0194SAndreas Gruenbacher 		return -EIO;
2591b411b363SPhilipp Reisner 	}
2592b411b363SPhilipp Reisner 
2593a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
259421ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
259521ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2596b411b363SPhilipp Reisner 
2597688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2598ce668b6dSChristoph Böhmwalder 	peer_req->opf = wire_flags_to_bio(connection, dp_flags);
2599a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2600a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2601ce668b6dSChristoph Böhmwalder 		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
2602f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2603f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2604f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2605f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2606f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2607f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2608f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2609ce668b6dSChristoph Böhmwalder 		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
2610a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2611f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2612f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2613f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2614a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26150b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26160b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2617a73ff323SLars Ellenberg 	}
2618688593c5SLars Ellenberg 
2619688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2620db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2621688593c5SLars Ellenberg 
2622bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2623bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2624db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2625db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2626bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2627b411b363SPhilipp Reisner 
2628302bdeaeSPhilipp Reisner 	rcu_read_lock();
262921ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
263021ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
263121ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
263221ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
263321ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
263421ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
263521ae5d7fSLars Ellenberg 			break;
263621ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
263721ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
263821ae5d7fSLars Ellenberg 			break;
263921ae5d7fSLars Ellenberg 		}
264021ae5d7fSLars Ellenberg 	}
2641302bdeaeSPhilipp Reisner 	rcu_read_unlock();
264221ae5d7fSLars Ellenberg 
264321ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
264421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
264521ae5d7fSLars Ellenberg 		inc_unacked(device);
264621ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
264721ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
264821ae5d7fSLars Ellenberg 	}
264921ae5d7fSLars Ellenberg 
265021ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
265121ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
265221ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26535dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
265421ae5d7fSLars Ellenberg 	}
265521ae5d7fSLars Ellenberg 
2656302bdeaeSPhilipp Reisner 	if (tp) {
265721ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
265821ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2659302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
266069a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26617be8da07SAndreas Gruenbacher 		if (err)
2662b411b363SPhilipp Reisner 			goto out_interrupted;
26630500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2664b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
26657be8da07SAndreas Gruenbacher 		if (err) {
26660500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
26677be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2668b30ab791SAndreas Gruenbacher 				put_ldev(device);
266982bc0194SAndreas Gruenbacher 				return 0;
2670b411b363SPhilipp Reisner 			}
2671b411b363SPhilipp Reisner 			goto out_interrupted;
2672b411b363SPhilipp Reisner 		}
2673b874d231SPhilipp Reisner 	} else {
267469a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
26750500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2676b874d231SPhilipp Reisner 	}
2677a34592ffSChristoph Hellwig 	/* TRIM and is processed synchronously,
26789104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2679a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2680a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2681a34592ffSChristoph Hellwig 	if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
2682b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
26830500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2684b411b363SPhilipp Reisner 
2685b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2686b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2687b6a370baSPhilipp Reisner 
2688b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2689b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2690b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2691db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
26924dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
269321ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2694b411b363SPhilipp Reisner 	}
2695b411b363SPhilipp Reisner 
2696ce668b6dSChristoph Böhmwalder 	err = drbd_submit_peer_request(peer_req);
269782bc0194SAndreas Gruenbacher 	if (!err)
269882bc0194SAndreas Gruenbacher 		return 0;
2699b411b363SPhilipp Reisner 
270010f6d992SLars Ellenberg 	/* don't care for the reason here */
2701d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27020500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2703a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2704b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27050500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
270621ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
270721ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2708b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
270921ae5d7fSLars Ellenberg 	}
271022cc37a9SLars Ellenberg 
2711b411b363SPhilipp Reisner out_interrupted:
27127e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2713b30ab791SAndreas Gruenbacher 	put_ldev(device);
2714b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
271582bc0194SAndreas Gruenbacher 	return err;
2716b411b363SPhilipp Reisner }
2717b411b363SPhilipp Reisner 
27180f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27190f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27200f0601f4SLars Ellenberg  *
27210f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27220f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27230f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27240f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27250f0601f4SLars Ellenberg  *
27260f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27270f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27280f0601f4SLars Ellenberg  */
2729ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2730ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2731e8299874SLars Ellenberg {
2732e8299874SLars Ellenberg 	struct lc_element *tmp;
2733ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2734e8299874SLars Ellenberg 
2735ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2736ad3fee79SLars Ellenberg 		return throttle;
2737e8299874SLars Ellenberg 
2738e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2739e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2740e8299874SLars Ellenberg 	if (tmp) {
2741e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2742e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2743e8299874SLars Ellenberg 			throttle = false;
2744ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2745ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2746e8299874SLars Ellenberg 	}
2747e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2748e8299874SLars Ellenberg 
2749e8299874SLars Ellenberg 	return throttle;
2750e8299874SLars Ellenberg }
2751e8299874SLars Ellenberg 
2752e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27530f0601f4SLars Ellenberg {
27548c40c7c4SChristoph Hellwig 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27550f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2756daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2757e8299874SLars Ellenberg 	int curr_events;
2758daeda1ccSPhilipp Reisner 
2759daeda1ccSPhilipp Reisner 	rcu_read_lock();
2760b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2761daeda1ccSPhilipp Reisner 	rcu_read_unlock();
27620f0601f4SLars Ellenberg 
27630f0601f4SLars Ellenberg 	/* feature disabled? */
2764daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2765e8299874SLars Ellenberg 		return false;
2766e3555d85SPhilipp Reisner 
27678446fe92SChristoph Hellwig 	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2768b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2769ad3fee79SLars Ellenberg 
2770ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2771ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
27720f0601f4SLars Ellenberg 		unsigned long rs_left;
27730f0601f4SLars Ellenberg 		int i;
27740f0601f4SLars Ellenberg 
2775b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
27760f0601f4SLars Ellenberg 
27770f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
27780f0601f4SLars Ellenberg 		 * approx. */
2779b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
27802649f080SLars Ellenberg 
2781b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2782b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
27832649f080SLars Ellenberg 		else
2784b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
27850f0601f4SLars Ellenberg 
2786b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
27870f0601f4SLars Ellenberg 		if (!dt)
27880f0601f4SLars Ellenberg 			dt++;
2789b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
27900f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
27910f0601f4SLars Ellenberg 
2792daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2793e8299874SLars Ellenberg 			return true;
27940f0601f4SLars Ellenberg 	}
2795e8299874SLars Ellenberg 	return false;
27960f0601f4SLars Ellenberg }
27970f0601f4SLars Ellenberg 
2798bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2799b411b363SPhilipp Reisner {
28009f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2801b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2802b411b363SPhilipp Reisner 	sector_t sector;
28034a76b161SAndreas Gruenbacher 	sector_t capacity;
2804db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2805b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2806b18b37beSPhilipp Reisner 	int size, verb;
2807e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28084a76b161SAndreas Gruenbacher 
28099f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28109f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28114a76b161SAndreas Gruenbacher 		return -EIO;
28129f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2813155bd9d1SChristoph Hellwig 	capacity = get_capacity(device->vdisk);
2814b411b363SPhilipp Reisner 
2815b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2816b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2817b411b363SPhilipp Reisner 
2818c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2819d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2820b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
282182bc0194SAndreas Gruenbacher 		return -EINVAL;
2822b411b363SPhilipp Reisner 	}
2823b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2824d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2825b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
282682bc0194SAndreas Gruenbacher 		return -EINVAL;
2827b411b363SPhilipp Reisner 	}
2828b411b363SPhilipp Reisner 
2829b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2830b18b37beSPhilipp Reisner 		verb = 1;
2831e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2832b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
283369a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2834b18b37beSPhilipp Reisner 			break;
2835700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2836b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2837b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2838b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
283969a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2840b18b37beSPhilipp Reisner 			break;
2841b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2842b18b37beSPhilipp Reisner 			verb = 0;
2843b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
284469a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2845b18b37beSPhilipp Reisner 			break;
2846b18b37beSPhilipp Reisner 		default:
284749ba9b1bSAndreas Gruenbacher 			BUG();
2848b18b37beSPhilipp Reisner 		}
2849b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2850d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2851b411b363SPhilipp Reisner 			    "no local data.\n");
2852b18b37beSPhilipp Reisner 
2853a821cc4aSLars Ellenberg 		/* drain possibly payload */
285469a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2855b411b363SPhilipp Reisner 	}
2856b411b363SPhilipp Reisner 
2857b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2858b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2859b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2860a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28619104d31aSLars Ellenberg 			size, GFP_NOIO);
2862db830c46SAndreas Gruenbacher 	if (!peer_req) {
2863b30ab791SAndreas Gruenbacher 		put_ldev(device);
286482bc0194SAndreas Gruenbacher 		return -ENOMEM;
2865b411b363SPhilipp Reisner 	}
2866ce668b6dSChristoph Böhmwalder 	peer_req->opf = REQ_OP_READ;
2867b411b363SPhilipp Reisner 
2868e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2869b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2870a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
287180a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
287221ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
287380a40e43SLars Ellenberg 		goto submit;
287480a40e43SLars Ellenberg 
2875700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2876700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2877700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2878700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2879700ca8c0SPhilipp Reisner 		   the block... */
2880700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2881df561f66SGustavo A. R. Silva 		fallthrough;
2882b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2883a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
28845f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2885b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2886b411b363SPhilipp Reisner 		break;
2887b411b363SPhilipp Reisner 
2888b411b363SPhilipp Reisner 	case P_OV_REPLY:
2889b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2890e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2891b411b363SPhilipp Reisner 		if (!di)
2892b411b363SPhilipp Reisner 			goto out_free_e;
2893b411b363SPhilipp Reisner 
2894e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2895b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2896b411b363SPhilipp Reisner 
2897db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2898db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2899c36c3cedSLars Ellenberg 
29009f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2901b411b363SPhilipp Reisner 			goto out_free_e;
2902b411b363SPhilipp Reisner 
2903e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29049f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2905a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29065f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2907b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2908aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2909aaaba345SLars Ellenberg 			device->use_csums = true;
2910e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29112649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2912b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2913a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2914b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29150f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29160f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29170f0601f4SLars Ellenberg 			goto submit_for_resync;
2918b411b363SPhilipp Reisner 		}
2919b411b363SPhilipp Reisner 		break;
2920b411b363SPhilipp Reisner 
2921b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2922b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29239f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2924de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2925de228bbaSLars Ellenberg 			int i;
2926b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2927b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2928b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2929b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2930de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2931b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2932b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2933de228bbaSLars Ellenberg 			}
2934d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2935b411b363SPhilipp Reisner 					(unsigned long long)sector);
2936b411b363SPhilipp Reisner 		}
2937a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2938b411b363SPhilipp Reisner 		break;
2939b411b363SPhilipp Reisner 
2940b411b363SPhilipp Reisner 	default:
294149ba9b1bSAndreas Gruenbacher 		BUG();
2942b411b363SPhilipp Reisner 	}
2943b411b363SPhilipp Reisner 
29440f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29450f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29460f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29470f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29480f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29490f0601f4SLars Ellenberg 	 *
29500f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29510f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29520f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29530f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29540f0601f4SLars Ellenberg 	 * a while, anyways.
29550f0601f4SLars Ellenberg 	 */
2956b411b363SPhilipp Reisner 
29570f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
29580f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
29590f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
29600f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
29610f0601f4SLars Ellenberg 	 *
29620f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
29630f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
29640f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
29650f0601f4SLars Ellenberg 	 */
2966c5a2c150SLars Ellenberg 
2967c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2968c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2969c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2970c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2971c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2972c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2973c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
2974c5a2c150SLars Ellenberg 
2975944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2976ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
2977ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
2978e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2979944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
2980b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
298180a40e43SLars Ellenberg 		goto out_free_e;
2982b411b363SPhilipp Reisner 
29830f0601f4SLars Ellenberg submit_for_resync:
2984b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
29850f0601f4SLars Ellenberg 
298680a40e43SLars Ellenberg submit:
2987944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
2988b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2989ce668b6dSChristoph Böhmwalder 	if (drbd_submit_peer_request(peer_req) == 0)
299082bc0194SAndreas Gruenbacher 		return 0;
2991b411b363SPhilipp Reisner 
299210f6d992SLars Ellenberg 	/* don't care for the reason here */
2993d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
2994c5a2c150SLars Ellenberg 
2995c5a2c150SLars Ellenberg out_free_e:
29960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2997a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
29980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
299922cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
300022cc37a9SLars Ellenberg 
3001b30ab791SAndreas Gruenbacher 	put_ldev(device);
3002b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
300382bc0194SAndreas Gruenbacher 	return -EIO;
3004b411b363SPhilipp Reisner }
3005b411b363SPhilipp Reisner 
30069b48ff07SLee Jones /*
300769a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
300869a22773SAndreas Gruenbacher  */
300969a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3010b411b363SPhilipp Reisner {
301169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3012b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3013b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
301444ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3015b411b363SPhilipp Reisner 
3016b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3017b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3018b411b363SPhilipp Reisner 
3019b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3020b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3021b411b363SPhilipp Reisner 
302244ed167dSPhilipp Reisner 	rcu_read_lock();
302369a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
302444ed167dSPhilipp Reisner 	rcu_read_unlock();
302544ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3026b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3027b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3028b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
302944ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3030d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3031b411b363SPhilipp Reisner 		break;
3032b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3033b411b363SPhilipp Reisner 		break;
3034b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3035b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3036b411b363SPhilipp Reisner 			rv = -1;
3037b411b363SPhilipp Reisner 			break;
3038b411b363SPhilipp Reisner 		}
3039b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3040b411b363SPhilipp Reisner 			rv =  1;
3041b411b363SPhilipp Reisner 			break;
3042b411b363SPhilipp Reisner 		}
3043df561f66SGustavo A. R. Silva 		fallthrough;	/* to one of the other strategies */
3044b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3045b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3046b411b363SPhilipp Reisner 			rv = 1;
3047b411b363SPhilipp Reisner 			break;
3048b411b363SPhilipp Reisner 		}
3049b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3050b411b363SPhilipp Reisner 			rv = -1;
3051b411b363SPhilipp Reisner 			break;
3052b411b363SPhilipp Reisner 		}
3053b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3054d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3055b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3056df561f66SGustavo A. R. Silva 		fallthrough;
3057b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3058b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
305969a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3060b411b363SPhilipp Reisner 				? -1 : 1;
3061b411b363SPhilipp Reisner 			break;
3062b411b363SPhilipp Reisner 		} else {
3063b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3064b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3065b411b363SPhilipp Reisner 		}
306644ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3067b411b363SPhilipp Reisner 			break;
3068df561f66SGustavo A. R. Silva 		fallthrough;
3069b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3070b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3071b411b363SPhilipp Reisner 			rv = -1;
3072b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3073b411b363SPhilipp Reisner 			rv =  1;
3074b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3075b411b363SPhilipp Reisner 		     /* Well, then use something else. */
307669a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3077b411b363SPhilipp Reisner 				? -1 : 1;
3078b411b363SPhilipp Reisner 		break;
3079b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3080b411b363SPhilipp Reisner 		rv = -1;
3081b411b363SPhilipp Reisner 		break;
3082b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3083b411b363SPhilipp Reisner 		rv =  1;
3084b411b363SPhilipp Reisner 	}
3085b411b363SPhilipp Reisner 
3086b411b363SPhilipp Reisner 	return rv;
3087b411b363SPhilipp Reisner }
3088b411b363SPhilipp Reisner 
30899b48ff07SLee Jones /*
309069a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
309169a22773SAndreas Gruenbacher  */
309269a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3093b411b363SPhilipp Reisner {
309469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
30956184ea21SAndreas Gruenbacher 	int hg, rv = -100;
309644ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3097b411b363SPhilipp Reisner 
309844ed167dSPhilipp Reisner 	rcu_read_lock();
309969a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
310044ed167dSPhilipp Reisner 	rcu_read_unlock();
310144ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3102b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3103b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3104b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3105b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3106b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
310744ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3108d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3109b411b363SPhilipp Reisner 		break;
3110b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3111b411b363SPhilipp Reisner 		break;
3112b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
311369a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3114b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3115b411b363SPhilipp Reisner 			rv = hg;
3116b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3117b411b363SPhilipp Reisner 			rv = hg;
3118b411b363SPhilipp Reisner 		break;
3119b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
312069a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3121b411b363SPhilipp Reisner 		break;
3122b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3123b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3124b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
312569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3126b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3127bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3128bb437946SAndreas Gruenbacher 
3129b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3130b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3131b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3132b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3133bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3134b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3135b411b363SPhilipp Reisner 			} else {
3136d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3137b411b363SPhilipp Reisner 				rv = hg;
3138b411b363SPhilipp Reisner 			}
3139b411b363SPhilipp Reisner 		} else
3140b411b363SPhilipp Reisner 			rv = hg;
3141b411b363SPhilipp Reisner 	}
3142b411b363SPhilipp Reisner 
3143b411b363SPhilipp Reisner 	return rv;
3144b411b363SPhilipp Reisner }
3145b411b363SPhilipp Reisner 
31469b48ff07SLee Jones /*
314769a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
314869a22773SAndreas Gruenbacher  */
314969a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3150b411b363SPhilipp Reisner {
315169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31526184ea21SAndreas Gruenbacher 	int hg, rv = -100;
315344ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3154b411b363SPhilipp Reisner 
315544ed167dSPhilipp Reisner 	rcu_read_lock();
315669a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
315744ed167dSPhilipp Reisner 	rcu_read_unlock();
315844ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3159b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3160b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3161b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3162b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3163b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3164b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3165b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
316644ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3167d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3168b411b363SPhilipp Reisner 		break;
3169b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
317069a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3171b411b363SPhilipp Reisner 		break;
3172b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3173b411b363SPhilipp Reisner 		break;
3174b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
317569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3176b411b363SPhilipp Reisner 		if (hg == -1) {
3177bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3178bb437946SAndreas Gruenbacher 
3179b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3180b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3181b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3182b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3183bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3184b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3185b411b363SPhilipp Reisner 			} else {
3186d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3187b411b363SPhilipp Reisner 				rv = hg;
3188b411b363SPhilipp Reisner 			}
3189b411b363SPhilipp Reisner 		} else
3190b411b363SPhilipp Reisner 			rv = hg;
3191b411b363SPhilipp Reisner 	}
3192b411b363SPhilipp Reisner 
3193b411b363SPhilipp Reisner 	return rv;
3194b411b363SPhilipp Reisner }
3195b411b363SPhilipp Reisner 
3196b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3197b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3198b411b363SPhilipp Reisner {
3199b411b363SPhilipp Reisner 	if (!uuid) {
3200d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3201b411b363SPhilipp Reisner 		return;
3202b411b363SPhilipp Reisner 	}
3203d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3204b411b363SPhilipp Reisner 	     text,
3205b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3206b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3207b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3208b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3209b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3210b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3211b411b363SPhilipp Reisner }
3212b411b363SPhilipp Reisner 
3213b411b363SPhilipp Reisner /*
3214b411b363SPhilipp Reisner   100	after split brain try auto recover
3215b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3216b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3217b411b363SPhilipp Reisner     0	no Sync
3218b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3219b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3220b411b363SPhilipp Reisner  -100	after split brain, disconnect
3221b411b363SPhilipp Reisner -1000	unrelated data
32224a23f264SPhilipp Reisner -1091   requires proto 91
32234a23f264SPhilipp Reisner -1096   requires proto 96
3224b411b363SPhilipp Reisner  */
3225f2d3d75bSLars Ellenberg 
3226f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3227b411b363SPhilipp Reisner {
322844a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
322944a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3230b411b363SPhilipp Reisner 	u64 self, peer;
3231b411b363SPhilipp Reisner 	int i, j;
3232b411b363SPhilipp Reisner 
3233b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3234b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3235b411b363SPhilipp Reisner 
3236b411b363SPhilipp Reisner 	*rule_nr = 10;
3237b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3238b411b363SPhilipp Reisner 		return 0;
3239b411b363SPhilipp Reisner 
3240b411b363SPhilipp Reisner 	*rule_nr = 20;
3241b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3242b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3243b411b363SPhilipp Reisner 		return -2;
3244b411b363SPhilipp Reisner 
3245b411b363SPhilipp Reisner 	*rule_nr = 30;
3246b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3247b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3248b411b363SPhilipp Reisner 		return 2;
3249b411b363SPhilipp Reisner 
3250b411b363SPhilipp Reisner 	if (self == peer) {
3251b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3252b411b363SPhilipp Reisner 
3253b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3254b411b363SPhilipp Reisner 
325544a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32564a23f264SPhilipp Reisner 				return -1091;
3257b411b363SPhilipp Reisner 
3258b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3259b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3260d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3261b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3262b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3263b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3264b411b363SPhilipp Reisner 
3265b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3266b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3267b411b363SPhilipp Reisner 				*rule_nr = 34;
3268b411b363SPhilipp Reisner 			} else {
3269d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3270b411b363SPhilipp Reisner 				*rule_nr = 36;
3271b411b363SPhilipp Reisner 			}
3272b411b363SPhilipp Reisner 
3273b411b363SPhilipp Reisner 			return 1;
3274b411b363SPhilipp Reisner 		}
3275b411b363SPhilipp Reisner 
3276b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3277b411b363SPhilipp Reisner 
327844a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32794a23f264SPhilipp Reisner 				return -1091;
3280b411b363SPhilipp Reisner 
3281b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3282b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3283d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3284b411b363SPhilipp Reisner 
3285b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3286b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3287b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3288b411b363SPhilipp Reisner 
3289b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3290b411b363SPhilipp Reisner 				*rule_nr = 35;
3291b411b363SPhilipp Reisner 			} else {
3292d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3293b411b363SPhilipp Reisner 				*rule_nr = 37;
3294b411b363SPhilipp Reisner 			}
3295b411b363SPhilipp Reisner 
3296b411b363SPhilipp Reisner 			return -1;
3297b411b363SPhilipp Reisner 		}
3298b411b363SPhilipp Reisner 
3299b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3300b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3301b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3302b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3303b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3304b411b363SPhilipp Reisner 		*rule_nr = 40;
3305b411b363SPhilipp Reisner 
3306f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3307f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3308f2d3d75bSLars Ellenberg 		if (rct == 0)
3309f2d3d75bSLars Ellenberg 			return 0;
3310f2d3d75bSLars Ellenberg 
3311f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3312f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3313f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3314f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3315f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3316f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3317f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3318f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3319f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3320f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3321f2d3d75bSLars Ellenberg 			}
3322f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3323f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3324f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3325f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3326f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3327f2d3d75bSLars Ellenberg 				return -100;
3328f2d3d75bSLars Ellenberg 			}
3329f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3330f2d3d75bSLars Ellenberg 				return 1;
3331f2d3d75bSLars Ellenberg 			return -1;
3332f2d3d75bSLars Ellenberg 		}
3333f2d3d75bSLars Ellenberg 
3334f2d3d75bSLars Ellenberg 		/* Both are secondary.
3335f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3336f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3337b411b363SPhilipp Reisner 		switch (rct) {
3338f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3339b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3340b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3341b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
334244a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3343b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3344b411b363SPhilipp Reisner 		}
3345b411b363SPhilipp Reisner 	}
3346b411b363SPhilipp Reisner 
3347b411b363SPhilipp Reisner 	*rule_nr = 50;
3348b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3349b411b363SPhilipp Reisner 	if (self == peer)
3350b411b363SPhilipp Reisner 		return -1;
3351b411b363SPhilipp Reisner 
3352b411b363SPhilipp Reisner 	*rule_nr = 51;
3353b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3354b411b363SPhilipp Reisner 	if (self == peer) {
335544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3356b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3357b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3358b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3359b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3360b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3361b411b363SPhilipp Reisner 
336244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33634a23f264SPhilipp Reisner 				return -1091;
3364b411b363SPhilipp Reisner 
3365b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3366b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
33674a23f264SPhilipp Reisner 
3368d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3369b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
33704a23f264SPhilipp Reisner 
3371b411b363SPhilipp Reisner 			return -1;
3372b411b363SPhilipp Reisner 		}
3373b411b363SPhilipp Reisner 	}
3374b411b363SPhilipp Reisner 
3375b411b363SPhilipp Reisner 	*rule_nr = 60;
3376b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3377b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3378b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3379b411b363SPhilipp Reisner 		if (self == peer)
3380b411b363SPhilipp Reisner 			return -2;
3381b411b363SPhilipp Reisner 	}
3382b411b363SPhilipp Reisner 
3383b411b363SPhilipp Reisner 	*rule_nr = 70;
3384b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3385b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3386b411b363SPhilipp Reisner 	if (self == peer)
3387b411b363SPhilipp Reisner 		return 1;
3388b411b363SPhilipp Reisner 
3389b411b363SPhilipp Reisner 	*rule_nr = 71;
3390b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3391b411b363SPhilipp Reisner 	if (self == peer) {
339244a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3393b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3394b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3395b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3396b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3397b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3398b411b363SPhilipp Reisner 
339944a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34004a23f264SPhilipp Reisner 				return -1091;
3401b411b363SPhilipp Reisner 
3402b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3403b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3404b411b363SPhilipp Reisner 
3405d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3406b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3407b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3408b411b363SPhilipp Reisner 
3409b411b363SPhilipp Reisner 			return 1;
3410b411b363SPhilipp Reisner 		}
3411b411b363SPhilipp Reisner 	}
3412b411b363SPhilipp Reisner 
3413b411b363SPhilipp Reisner 
3414b411b363SPhilipp Reisner 	*rule_nr = 80;
3415b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3416b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3417b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3418b411b363SPhilipp Reisner 		if (self == peer)
3419b411b363SPhilipp Reisner 			return 2;
3420b411b363SPhilipp Reisner 	}
3421b411b363SPhilipp Reisner 
3422b411b363SPhilipp Reisner 	*rule_nr = 90;
3423b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3424b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3425b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3426b411b363SPhilipp Reisner 		return 100;
3427b411b363SPhilipp Reisner 
3428b411b363SPhilipp Reisner 	*rule_nr = 100;
3429b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3430b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3431b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3432b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3433b411b363SPhilipp Reisner 			if (self == peer)
3434b411b363SPhilipp Reisner 				return -100;
3435b411b363SPhilipp Reisner 		}
3436b411b363SPhilipp Reisner 	}
3437b411b363SPhilipp Reisner 
3438b411b363SPhilipp Reisner 	return -1000;
3439b411b363SPhilipp Reisner }
3440b411b363SPhilipp Reisner 
3441b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3442b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3443b411b363SPhilipp Reisner  */
344469a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
344569a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3446b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3447b411b363SPhilipp Reisner {
344869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3449b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3450b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
345144ed167dSPhilipp Reisner 	struct net_conf *nc;
3452d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3453b411b363SPhilipp Reisner 
3454b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3455b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3456b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3457b411b363SPhilipp Reisner 
3458d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
34599f2247bbSPhilipp Reisner 
3460b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3461b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3462b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3463b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3464b411b363SPhilipp Reisner 
3465f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3466b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3467b411b363SPhilipp Reisner 
3468d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3469b411b363SPhilipp Reisner 
3470b411b363SPhilipp Reisner 	if (hg == -1000) {
3471d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3472b411b363SPhilipp Reisner 		return C_MASK;
3473b411b363SPhilipp Reisner 	}
3474f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3475f2d3d75bSLars Ellenberg 		int proto, fflags;
3476f2d3d75bSLars Ellenberg 		hg = -hg;
3477f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3478f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3479f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3480f2d3d75bSLars Ellenberg 					proto, fflags);
3481f2d3d75bSLars Ellenberg 		return C_MASK;
3482f2d3d75bSLars Ellenberg 	}
34834a23f264SPhilipp Reisner 	if (hg < -1000) {
3484d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3485b411b363SPhilipp Reisner 		return C_MASK;
3486b411b363SPhilipp Reisner 	}
3487b411b363SPhilipp Reisner 
3488b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3489b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3490b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3491b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3492b411b363SPhilipp Reisner 		if (f)
3493b411b363SPhilipp Reisner 			hg = hg*2;
3494d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3495b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3496b411b363SPhilipp Reisner 	}
3497b411b363SPhilipp Reisner 
34983a11a487SAdam Gandelman 	if (abs(hg) == 100)
3499b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35003a11a487SAdam Gandelman 
350144ed167dSPhilipp Reisner 	rcu_read_lock();
350269a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3503d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3504d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3505d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3506d29e89e3SRoland Kammerer 	rcu_read_unlock();
350744ed167dSPhilipp Reisner 
3508d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3509b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3510b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3511b411b363SPhilipp Reisner 		int forced = (hg == -100);
3512b411b363SPhilipp Reisner 
3513b411b363SPhilipp Reisner 		switch (pcount) {
3514b411b363SPhilipp Reisner 		case 0:
351569a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3516b411b363SPhilipp Reisner 			break;
3517b411b363SPhilipp Reisner 		case 1:
351869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3519b411b363SPhilipp Reisner 			break;
3520b411b363SPhilipp Reisner 		case 2:
352169a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3522b411b363SPhilipp Reisner 			break;
3523b411b363SPhilipp Reisner 		}
3524b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3525d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3526b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3527b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3528b411b363SPhilipp Reisner 			if (forced) {
3529d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3530b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3531b411b363SPhilipp Reisner 				hg = hg*2;
3532b411b363SPhilipp Reisner 			}
3533b411b363SPhilipp Reisner 		}
3534b411b363SPhilipp Reisner 	}
3535b411b363SPhilipp Reisner 
3536b411b363SPhilipp Reisner 	if (hg == -100) {
3537b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3538b411b363SPhilipp Reisner 			hg = -1;
3539b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3540b411b363SPhilipp Reisner 			hg = 1;
3541b411b363SPhilipp Reisner 
3542b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3543d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3544b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3545b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3546b411b363SPhilipp Reisner 	}
3547b411b363SPhilipp Reisner 
3548b411b363SPhilipp Reisner 	if (hg == -100) {
3549580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3550580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3551580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3552580b9767SLars Ellenberg 		 * to that disk, in a way... */
3553d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3554b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3555b411b363SPhilipp Reisner 		return C_MASK;
3556b411b363SPhilipp Reisner 	}
3557b411b363SPhilipp Reisner 
3558b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3559d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3560b411b363SPhilipp Reisner 		return C_MASK;
3561b411b363SPhilipp Reisner 	}
3562b411b363SPhilipp Reisner 
3563b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3564b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
356544ed167dSPhilipp Reisner 		switch (rr_conflict) {
3566b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3567b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3568df561f66SGustavo A. R. Silva 			fallthrough;
3569b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3570d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3571b411b363SPhilipp Reisner 			return C_MASK;
3572b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3573d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3574b411b363SPhilipp Reisner 			     "assumption\n");
3575b411b363SPhilipp Reisner 		}
3576b411b363SPhilipp Reisner 	}
3577b411b363SPhilipp Reisner 
357869a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3579cf14c2e9SPhilipp Reisner 		if (hg == 0)
3580d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3581cf14c2e9SPhilipp Reisner 		else
3582d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3583cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3584cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3585cf14c2e9SPhilipp Reisner 		return C_MASK;
3586cf14c2e9SPhilipp Reisner 	}
3587cf14c2e9SPhilipp Reisner 
3588b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3589d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3590b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
359120ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3592b411b363SPhilipp Reisner 			return C_MASK;
3593b411b363SPhilipp Reisner 	}
3594b411b363SPhilipp Reisner 
3595b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3596b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3597b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3598b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3599b411b363SPhilipp Reisner 	} else {
3600b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3601b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3602d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3603b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3604b411b363SPhilipp Reisner 		}
3605b411b363SPhilipp Reisner 	}
3606b411b363SPhilipp Reisner 
3607b411b363SPhilipp Reisner 	return rv;
3608b411b363SPhilipp Reisner }
3609b411b363SPhilipp Reisner 
3610f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3611b411b363SPhilipp Reisner {
3612b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3613f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3614f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3615b411b363SPhilipp Reisner 
3616b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3617f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3618f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3619b411b363SPhilipp Reisner 
3620b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3621f179d76dSPhilipp Reisner 	return peer;
3622b411b363SPhilipp Reisner }
3623b411b363SPhilipp Reisner 
3624bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3625b411b363SPhilipp Reisner {
3626e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3627036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3628036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3629036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3630036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36313d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36327aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3633b411b363SPhilipp Reisner 
3634b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3635b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3636b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3637b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3638b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3639cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36406139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3641cf14c2e9SPhilipp Reisner 
3642bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
364386db0618SAndreas Gruenbacher 		int err;
364486db0618SAndreas Gruenbacher 
364588104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
364686db0618SAndreas Gruenbacher 			return -EIO;
3647bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
364886db0618SAndreas Gruenbacher 		if (err)
364986db0618SAndreas Gruenbacher 			return err;
365088104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3651036b17eaSPhilipp Reisner 	}
365286db0618SAndreas Gruenbacher 
36537d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3654bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3655cf14c2e9SPhilipp Reisner 
3656cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3657bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3658b411b363SPhilipp Reisner 
365944ed167dSPhilipp Reisner 		rcu_read_lock();
3660bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
366144ed167dSPhilipp Reisner 
3662036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
36631ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
366444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3665b411b363SPhilipp Reisner 		}
3666b411b363SPhilipp Reisner 
3667f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
36681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
366944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3670b411b363SPhilipp Reisner 		}
3671b411b363SPhilipp Reisner 
3672f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
36731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
367444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3675b411b363SPhilipp Reisner 		}
3676b411b363SPhilipp Reisner 
3677f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
36781ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
367944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3680b411b363SPhilipp Reisner 		}
3681b411b363SPhilipp Reisner 
36826139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
36831ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
368444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3685b411b363SPhilipp Reisner 		}
3686b411b363SPhilipp Reisner 
368744ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
36881ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
368944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3690b411b363SPhilipp Reisner 		}
3691b411b363SPhilipp Reisner 
3692036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
36931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3694036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3695036b17eaSPhilipp Reisner 		}
3696036b17eaSPhilipp Reisner 
369786db0618SAndreas Gruenbacher 		rcu_read_unlock();
3698fbc12f45SAndreas Gruenbacher 	}
36997d4c782cSAndreas Gruenbacher 
37007d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37017d4c782cSAndreas Gruenbacher 		int hash_size;
37027d4c782cSAndreas Gruenbacher 
37037d4c782cSAndreas Gruenbacher 		/*
37047d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37057d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37067d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37077d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37087d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37097d4c782cSAndreas Gruenbacher 		 * change.
37107d4c782cSAndreas Gruenbacher 		 */
37117d4c782cSAndreas Gruenbacher 
37123d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37131b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37141b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37151ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37167d4c782cSAndreas Gruenbacher 				 integrity_alg);
3717b411b363SPhilipp Reisner 			goto disconnect;
3718b411b363SPhilipp Reisner 		}
3719b411b363SPhilipp Reisner 
37203d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37217d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37227d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37237d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37257d4c782cSAndreas Gruenbacher 			goto disconnect;
37267d4c782cSAndreas Gruenbacher 		}
37277d4c782cSAndreas Gruenbacher 	}
37287d4c782cSAndreas Gruenbacher 
37297d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37308404e191SZhen Lei 	if (!new_net_conf)
3731b411b363SPhilipp Reisner 		goto disconnect;
3732b411b363SPhilipp Reisner 
3733bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37340500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3735bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37367d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3737b411b363SPhilipp Reisner 
37387d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37397d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37407d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37417d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37427d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3743b411b363SPhilipp Reisner 
3744bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37450500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3746bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3747b411b363SPhilipp Reisner 
37483d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3749bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3750bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3751bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3752bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3753bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3754b411b363SPhilipp Reisner 
37557d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37561ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
37577d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3758b411b363SPhilipp Reisner 
375990c6c291SUladzislau Rezki (Sony) 	kvfree_rcu(old_net_conf);
376082bc0194SAndreas Gruenbacher 	return 0;
3761b411b363SPhilipp Reisner 
376244ed167dSPhilipp Reisner disconnect_rcu_unlock:
376344ed167dSPhilipp Reisner 	rcu_read_unlock();
3764b411b363SPhilipp Reisner disconnect:
37653d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3766036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3767036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3768bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
376982bc0194SAndreas Gruenbacher 	return -EIO;
3770b411b363SPhilipp Reisner }
3771b411b363SPhilipp Reisner 
3772b411b363SPhilipp Reisner /* helper function
3773b411b363SPhilipp Reisner  * input: alg name, feature name
3774b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3775b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3776b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
37773d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
37783d0e6375SKees Cook 		const struct drbd_device *device,
3779b411b363SPhilipp Reisner 		const char *alg, const char *name)
3780b411b363SPhilipp Reisner {
37813d0e6375SKees Cook 	struct crypto_shash *tfm;
3782b411b363SPhilipp Reisner 
3783b411b363SPhilipp Reisner 	if (!alg[0])
3784b411b363SPhilipp Reisner 		return NULL;
3785b411b363SPhilipp Reisner 
37863d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3787b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3788d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3789b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3790b411b363SPhilipp Reisner 		return tfm;
3791b411b363SPhilipp Reisner 	}
3792b411b363SPhilipp Reisner 	return tfm;
3793b411b363SPhilipp Reisner }
3794b411b363SPhilipp Reisner 
3795bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3796b411b363SPhilipp Reisner {
3797bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
37984a76b161SAndreas Gruenbacher 	int size = pi->size;
37994a76b161SAndreas Gruenbacher 
38004a76b161SAndreas Gruenbacher 	while (size) {
38014a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3802bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38034a76b161SAndreas Gruenbacher 		if (s <= 0) {
38044a76b161SAndreas Gruenbacher 			if (s < 0)
38054a76b161SAndreas Gruenbacher 				return s;
38064a76b161SAndreas Gruenbacher 			break;
38074a76b161SAndreas Gruenbacher 		}
38084a76b161SAndreas Gruenbacher 		size -= s;
38094a76b161SAndreas Gruenbacher 	}
38104a76b161SAndreas Gruenbacher 	if (size)
38114a76b161SAndreas Gruenbacher 		return -EIO;
38124a76b161SAndreas Gruenbacher 	return 0;
38134a76b161SAndreas Gruenbacher }
38144a76b161SAndreas Gruenbacher 
38154a76b161SAndreas Gruenbacher /*
38164a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38174a76b161SAndreas Gruenbacher  *
38184a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38194a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38204a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38214a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38224a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38234a76b161SAndreas Gruenbacher  *
38244a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38254a76b161SAndreas Gruenbacher  */
3826bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38274a76b161SAndreas Gruenbacher {
38281ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38292fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3830bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38314a76b161SAndreas Gruenbacher }
38324a76b161SAndreas Gruenbacher 
3833bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38344a76b161SAndreas Gruenbacher {
38359f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3836b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3837e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3838b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38393d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38403d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38412ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3842813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3843bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3844813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38456a365874SStephen Kitt 	unsigned int fifo_size = 0;
384682bc0194SAndreas Gruenbacher 	int err;
3847b411b363SPhilipp Reisner 
38489f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38499f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3850bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38519f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3852b411b363SPhilipp Reisner 
3853b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3854b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3855b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
38568e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
38578e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3858b411b363SPhilipp Reisner 
3859e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3860d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3861e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
386282bc0194SAndreas Gruenbacher 		return -EIO;
3863b411b363SPhilipp Reisner 	}
3864b411b363SPhilipp Reisner 
3865b411b363SPhilipp Reisner 	if (apv <= 88) {
3866e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3867e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38688e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3869e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3870e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38710b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
38728e26f9ccSPhilipp Reisner 	} else {
3873e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3874e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38750b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3876b411b363SPhilipp Reisner 	}
3877b411b363SPhilipp Reisner 
3878b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3879e658983aSAndreas Gruenbacher 	p = pi->data;
388052a0cab3SKees Cook 	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
388152a0cab3SKees Cook 	memset(&p->algs, 0, sizeof(p->algs));
3882b411b363SPhilipp Reisner 
38839f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
388482bc0194SAndreas Gruenbacher 	if (err)
388582bc0194SAndreas Gruenbacher 		return err;
3886b411b363SPhilipp Reisner 
38870500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
38889f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3889b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3890daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3891daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3892b30ab791SAndreas Gruenbacher 			put_ldev(device);
38930500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3894d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3895daeda1ccSPhilipp Reisner 			return -ENOMEM;
3896f399002eSLars Ellenberg 		}
3897b411b363SPhilipp Reisner 
3898b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3899daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3900daeda1ccSPhilipp Reisner 
39016394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3902813472ceSPhilipp Reisner 	}
3903b411b363SPhilipp Reisner 
3904b411b363SPhilipp Reisner 	if (apv >= 88) {
3905b411b363SPhilipp Reisner 		if (apv == 88) {
39065de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3907d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39085de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3909b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3910813472ceSPhilipp Reisner 				goto reconnect;
3911b411b363SPhilipp Reisner 			}
3912b411b363SPhilipp Reisner 
39139f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3914813472ceSPhilipp Reisner 			if (err)
3915813472ceSPhilipp Reisner 				goto reconnect;
3916b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3917b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39180b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3919b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3920b411b363SPhilipp Reisner 
3921b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3922b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3923b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39240b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39250b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3926b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3927b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3928b411b363SPhilipp Reisner 		}
3929b411b363SPhilipp Reisner 
39302ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3931b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3932d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39332ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3934b411b363SPhilipp Reisner 				goto disconnect;
3935b411b363SPhilipp Reisner 			}
3936b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3937b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3938b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3939b411b363SPhilipp Reisner 				verify_tfm = NULL;
3940b411b363SPhilipp Reisner 				goto disconnect;
3941b411b363SPhilipp Reisner 			}
3942b411b363SPhilipp Reisner 		}
3943b411b363SPhilipp Reisner 
39442ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3945b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3946d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39472ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3948b411b363SPhilipp Reisner 				goto disconnect;
3949b411b363SPhilipp Reisner 			}
3950b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3951b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3952b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3953b411b363SPhilipp Reisner 				csums_tfm = NULL;
3954b411b363SPhilipp Reisner 				goto disconnect;
3955b411b363SPhilipp Reisner 			}
3956b411b363SPhilipp Reisner 		}
3957b411b363SPhilipp Reisner 
3958813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3959daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3960daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3961daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3962daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3963778f271dSPhilipp Reisner 
3964daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3965b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3966813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3967813472ceSPhilipp Reisner 				if (!new_plan) {
3968d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3969b30ab791SAndreas Gruenbacher 					put_ldev(device);
3970778f271dSPhilipp Reisner 					goto disconnect;
3971778f271dSPhilipp Reisner 				}
3972778f271dSPhilipp Reisner 			}
39738e26f9ccSPhilipp Reisner 		}
3974b411b363SPhilipp Reisner 
397591fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
39762ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
39778404e191SZhen Lei 			if (!new_net_conf)
397891fd4dadSPhilipp Reisner 				goto disconnect;
397991fd4dadSPhilipp Reisner 
39802ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
398191fd4dadSPhilipp Reisner 
3982b411b363SPhilipp Reisner 			if (verify_tfm) {
39832ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
39842ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
39853d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
39869f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
3987d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3988b411b363SPhilipp Reisner 			}
3989b411b363SPhilipp Reisner 			if (csums_tfm) {
39902ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
39912ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
39923d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
39939f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
3994d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3995b411b363SPhilipp Reisner 			}
3996bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
3997778f271dSPhilipp Reisner 		}
3998b411b363SPhilipp Reisner 	}
3999b411b363SPhilipp Reisner 
4000813472ceSPhilipp Reisner 	if (new_disk_conf) {
4001b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4002b30ab791SAndreas Gruenbacher 		put_ldev(device);
4003b411b363SPhilipp Reisner 	}
4004813472ceSPhilipp Reisner 
4005813472ceSPhilipp Reisner 	if (new_plan) {
4006b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4007b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4008813472ceSPhilipp Reisner 	}
4009daeda1ccSPhilipp Reisner 
40100500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4011daeda1ccSPhilipp Reisner 	synchronize_rcu();
4012daeda1ccSPhilipp Reisner 	if (new_net_conf)
4013daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4014daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4015813472ceSPhilipp Reisner 	kfree(old_plan);
4016daeda1ccSPhilipp Reisner 
401782bc0194SAndreas Gruenbacher 	return 0;
4018b411b363SPhilipp Reisner 
4019813472ceSPhilipp Reisner reconnect:
4020813472ceSPhilipp Reisner 	if (new_disk_conf) {
4021b30ab791SAndreas Gruenbacher 		put_ldev(device);
4022813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4023813472ceSPhilipp Reisner 	}
40240500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4025813472ceSPhilipp Reisner 	return -EIO;
4026813472ceSPhilipp Reisner 
4027b411b363SPhilipp Reisner disconnect:
4028813472ceSPhilipp Reisner 	kfree(new_plan);
4029813472ceSPhilipp Reisner 	if (new_disk_conf) {
4030b30ab791SAndreas Gruenbacher 		put_ldev(device);
4031813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4032813472ceSPhilipp Reisner 	}
40330500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4034b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4035b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40363d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4037b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40383d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40399f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
404082bc0194SAndreas Gruenbacher 	return -EIO;
4041b411b363SPhilipp Reisner }
4042b411b363SPhilipp Reisner 
4043b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4044b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4045b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4046b411b363SPhilipp Reisner {
4047b411b363SPhilipp Reisner 	sector_t d;
4048b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4049b411b363SPhilipp Reisner 		return;
4050b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4051b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4052d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4053b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4054b411b363SPhilipp Reisner }
4055b411b363SPhilipp Reisner 
4056bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4057b411b363SPhilipp Reisner {
40589f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4059b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4060e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
40619104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4062e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
40636a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
406494c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4065b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4066e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4067b411b363SPhilipp Reisner 
40689f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40699f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4070bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
40719f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4072155bd9d1SChristoph Hellwig 	cur_size = get_capacity(device->vdisk);
40734a76b161SAndreas Gruenbacher 
4074b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4075b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
40766a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4077b411b363SPhilipp Reisner 
4078b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4079b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4080b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4081b411b363SPhilipp Reisner 
4082b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4083daeda1ccSPhilipp Reisner 		rcu_read_lock();
4084b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4085daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4086daeda1ccSPhilipp Reisner 
4087b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4088b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4089b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4090daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4091b411b363SPhilipp Reisner 
4092b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4093b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4094b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4095daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4096b411b363SPhilipp Reisner 
4097ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4098ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4099ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
410060bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
410160bac040SLars Ellenberg 		if (new_size < cur_size &&
4102b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4103ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
410460bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
410560bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41069f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4107b30ab791SAndreas Gruenbacher 			put_ldev(device);
410882bc0194SAndreas Gruenbacher 			return -EIO;
4109b411b363SPhilipp Reisner 		}
4110daeda1ccSPhilipp Reisner 
4111daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4112daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4113daeda1ccSPhilipp Reisner 
4114daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4115daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4116b30ab791SAndreas Gruenbacher 				put_ldev(device);
4117daeda1ccSPhilipp Reisner 				return -ENOMEM;
4118daeda1ccSPhilipp Reisner 			}
4119daeda1ccSPhilipp Reisner 
41200500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4121b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4122daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4123daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4124daeda1ccSPhilipp Reisner 
4125b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41260500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
412790c6c291SUladzislau Rezki (Sony) 			kvfree_rcu(old_disk_conf);
4128daeda1ccSPhilipp Reisner 
4129ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4130ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4131daeda1ccSPhilipp Reisner 		}
4132daeda1ccSPhilipp Reisner 
4133b30ab791SAndreas Gruenbacher 		put_ldev(device);
4134b411b363SPhilipp Reisner 	}
4135b411b363SPhilipp Reisner 
413620c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4137dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
413820c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4139dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
414020c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
414120c68fdeSLars Ellenberg 
4142e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4143b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41449104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4145b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4146b30ab791SAndreas Gruenbacher 		put_ldev(device);
4147e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
414882bc0194SAndreas Gruenbacher 			return -EIO;
4149b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4150b411b363SPhilipp Reisner 	} else {
41516a8d68b1SLars Ellenberg 		/*
41526a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
41536a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
41546a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
41556a8d68b1SLars Ellenberg 		 *
41566a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
41576a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
41586a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
41596a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
41606a8d68b1SLars Ellenberg 		 *
41616a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
41626a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
416394c43a13SLars Ellenberg 		 *
416494c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
416594c43a13SLars Ellenberg 		 * In which case we ignore this completely.
41666a8d68b1SLars Ellenberg 		 */
416794c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
41689104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
416994c43a13SLars Ellenberg 		if (new_size == 0) {
417094c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
417194c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
417294c43a13SLars Ellenberg 			/* nothing to do */
417394c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
417494c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
417594c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
417694c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
417794c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
417894c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
417994c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
418094c43a13SLars Ellenberg 			return -EIO;
418194c43a13SLars Ellenberg 		} else {
418294c43a13SLars Ellenberg 			/* I believe the peer, if
418394c43a13SLars Ellenberg 			 *  - I don't have a current size myself
418494c43a13SLars Ellenberg 			 *  - we agree on the size anyways
418594c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
418694c43a13SLars Ellenberg 			 *    and he has the only disk
418794c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
418894c43a13SLars Ellenberg 			 *    and he has the only disk,
418994c43a13SLars Ellenberg 			 *    which is larger than my current size
419094c43a13SLars Ellenberg 			 */
419194c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
419294c43a13SLars Ellenberg 		}
4193b411b363SPhilipp Reisner 	}
4194b411b363SPhilipp Reisner 
4195b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4196b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4197b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4198b411b363SPhilipp Reisner 			ldsc = 1;
4199b411b363SPhilipp Reisner 		}
4200b411b363SPhilipp Reisner 
4201b30ab791SAndreas Gruenbacher 		put_ldev(device);
4202b411b363SPhilipp Reisner 	}
4203b411b363SPhilipp Reisner 
4204b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4205155bd9d1SChristoph Hellwig 		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4206155bd9d1SChristoph Hellwig 		    ldsc) {
4207b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4208b411b363SPhilipp Reisner 			 * needs to know my new size... */
420969a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4210b411b363SPhilipp Reisner 		}
4211b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4212b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4213b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4214b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4215e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4216d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4217b411b363SPhilipp Reisner 				else
4218b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4219e89b591cSPhilipp Reisner 			} else
4220b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4221b411b363SPhilipp Reisner 		}
4222b411b363SPhilipp Reisner 	}
4223b411b363SPhilipp Reisner 
422482bc0194SAndreas Gruenbacher 	return 0;
4225b411b363SPhilipp Reisner }
4226b411b363SPhilipp Reisner 
4227bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4228b411b363SPhilipp Reisner {
42299f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4230b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4231e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4232b411b363SPhilipp Reisner 	u64 *p_uuid;
423362b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4234b411b363SPhilipp Reisner 
42359f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42369f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4237bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42389f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42394a76b161SAndreas Gruenbacher 
4240365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42418404e191SZhen Lei 	if (!p_uuid)
4242063eacf8SJing Wang 		return false;
4243b411b363SPhilipp Reisner 
4244b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4245b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4246b411b363SPhilipp Reisner 
4247b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4248b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4249b411b363SPhilipp Reisner 
4250b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4251b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4252b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4253b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4254d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4255b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
42569f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
425782bc0194SAndreas Gruenbacher 		return -EIO;
4258b411b363SPhilipp Reisner 	}
4259b411b363SPhilipp Reisner 
4260b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4261b411b363SPhilipp Reisner 		int skip_initial_sync =
4262b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
42639f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4264b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4265b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4266b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4267d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4268b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
426920ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
427020ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4271b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4272b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4273b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4274b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4275b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
427662b0da3aSLars Ellenberg 			updated_uuids = 1;
4277b411b363SPhilipp Reisner 		}
4278b30ab791SAndreas Gruenbacher 		put_ldev(device);
4279b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4280b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
428118a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
428218a50fa2SPhilipp Reisner 		   for me. */
4283b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4284b411b363SPhilipp Reisner 	}
4285b411b363SPhilipp Reisner 
4286b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4287b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4288b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4289b411b363SPhilipp Reisner 	   new disk state... */
4290b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4291b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4292b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4293b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
429462b0da3aSLars Ellenberg 
429562b0da3aSLars Ellenberg 	if (updated_uuids)
4296b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4297b411b363SPhilipp Reisner 
429882bc0194SAndreas Gruenbacher 	return 0;
4299b411b363SPhilipp Reisner }
4300b411b363SPhilipp Reisner 
4301b411b363SPhilipp Reisner /**
4302b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4303b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4304b411b363SPhilipp Reisner  */
4305b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4306b411b363SPhilipp Reisner {
4307b411b363SPhilipp Reisner 	union drbd_state ms;
4308b411b363SPhilipp Reisner 
4309b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4310369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4311b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4312b411b363SPhilipp Reisner 
4313b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4314b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4315b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4316b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4317b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4318b411b363SPhilipp Reisner 	};
4319b411b363SPhilipp Reisner 
4320b411b363SPhilipp Reisner 	ms.i = ps.i;
4321b411b363SPhilipp Reisner 
4322b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4323b411b363SPhilipp Reisner 	ms.peer = ps.role;
4324b411b363SPhilipp Reisner 	ms.role = ps.peer;
4325b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4326b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4327b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4328b411b363SPhilipp Reisner 
4329b411b363SPhilipp Reisner 	return ms;
4330b411b363SPhilipp Reisner }
4331b411b363SPhilipp Reisner 
4332bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4333b411b363SPhilipp Reisner {
43349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4335b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4336e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4337b411b363SPhilipp Reisner 	union drbd_state mask, val;
4338bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4339b411b363SPhilipp Reisner 
43409f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43419f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43424a76b161SAndreas Gruenbacher 		return -EIO;
43439f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43444a76b161SAndreas Gruenbacher 
4345b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4346b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4347b411b363SPhilipp Reisner 
43489f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4349b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
435069a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
435182bc0194SAndreas Gruenbacher 		return 0;
4352b411b363SPhilipp Reisner 	}
4353b411b363SPhilipp Reisner 
4354b411b363SPhilipp Reisner 	mask = convert_state(mask);
4355b411b363SPhilipp Reisner 	val = convert_state(val);
4356b411b363SPhilipp Reisner 
4357b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
435869a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4359047cd4a6SPhilipp Reisner 
4360b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4361b411b363SPhilipp Reisner 
436282bc0194SAndreas Gruenbacher 	return 0;
4363b411b363SPhilipp Reisner }
4364b411b363SPhilipp Reisner 
4365bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4366b411b363SPhilipp Reisner {
4367e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4368dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4369dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4370dfafcc8aSPhilipp Reisner 
4371dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4372dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4373dfafcc8aSPhilipp Reisner 
4374bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4375bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4376bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
437782bc0194SAndreas Gruenbacher 		return 0;
4378dfafcc8aSPhilipp Reisner 	}
4379dfafcc8aSPhilipp Reisner 
4380dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4381dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4382dfafcc8aSPhilipp Reisner 
4383bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4384bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4385dfafcc8aSPhilipp Reisner 
438682bc0194SAndreas Gruenbacher 	return 0;
4387dfafcc8aSPhilipp Reisner }
4388dfafcc8aSPhilipp Reisner 
4389bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4390b411b363SPhilipp Reisner {
43919f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4392b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4393e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
43944ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4395b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
439665d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4397b411b363SPhilipp Reisner 	int rv;
4398b411b363SPhilipp Reisner 
43999f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44009f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4401bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44029f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44034a76b161SAndreas Gruenbacher 
4404b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4405b411b363SPhilipp Reisner 
4406b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4407b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4408b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4409d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4410b411b363SPhilipp Reisner 	}
4411b411b363SPhilipp Reisner 
44120500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4413b411b363SPhilipp Reisner  retry:
4414b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44150500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4416b411b363SPhilipp Reisner 
4417668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4418545752d5SLars Ellenberg 	 * already decided to close the connection again,
4419545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4420545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
442158ffa580SLars Ellenberg 		return -ECONNRESET;
4422545752d5SLars Ellenberg 
442340424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
442440424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
442540424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
442640424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
442740424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
442840424e4aSLars Ellenberg 	 */
442940424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
443040424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4431e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4432e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4433e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4434e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4435e9ef7bb6SLars Ellenberg 		 * syncing states.
4436e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4437e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4438e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4439e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4440e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4441e9ef7bb6SLars Ellenberg 
4442e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4443e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4444e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4445e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4446e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4447b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4448b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
444982bc0194SAndreas Gruenbacher 			return 0;
4450e9ef7bb6SLars Ellenberg 		}
4451e9ef7bb6SLars Ellenberg 	}
4452e9ef7bb6SLars Ellenberg 
445302b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
445402b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
445502b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4456b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4457b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
445858ffa580SLars Ellenberg 		return 0;
445902b91b55SLars Ellenberg 	}
446002b91b55SLars Ellenberg 
4461e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4462e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4463e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4464e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4465e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4466e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4467e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4468e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4469e9ef7bb6SLars Ellenberg 
44704ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
44714ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4472b411b363SPhilipp Reisner 
447367531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
447467531718SPhilipp Reisner 		ns.conn = C_BEHIND;
447567531718SPhilipp Reisner 
4476fe43ed97SLars Ellenberg 	/* TODO:
4477fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4478fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4479fe43ed97SLars Ellenberg 	 *
4480fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4481fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4482fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4483fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4484fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4485fe43ed97SLars Ellenberg 	 *
4486fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4487fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4488fe43ed97SLars Ellenberg 	 *
4489fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4490fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4491fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4492fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4493fe43ed97SLars Ellenberg 	 */
4494fe43ed97SLars Ellenberg 
4495b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4496b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4497b411b363SPhilipp Reisner 		int cr; /* consider resync */
4498b411b363SPhilipp Reisner 
4499b411b363SPhilipp Reisner 		/* if we established a new connection */
45004ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4501b411b363SPhilipp Reisner 		/* if we had an established connection
4502b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45034ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4504b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45054ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4506b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4507a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4508b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4509b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4510b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45114ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4512b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4513b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4514b411b363SPhilipp Reisner 
4515b411b363SPhilipp Reisner 		if (cr)
451669a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4517b411b363SPhilipp Reisner 
4518b30ab791SAndreas Gruenbacher 		put_ldev(device);
45194ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45204ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4521b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4522b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4523b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4524d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4525b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4526580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4527b411b363SPhilipp Reisner 			} else {
45289f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
452982bc0194SAndreas Gruenbacher 					return -EIO;
45300b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45319f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
453282bc0194SAndreas Gruenbacher 				return -EIO;
4533b411b363SPhilipp Reisner 			}
4534b411b363SPhilipp Reisner 		}
4535b411b363SPhilipp Reisner 	}
4536b411b363SPhilipp Reisner 
45370500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4538b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4539b411b363SPhilipp Reisner 		goto retry;
4540b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4541b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4542b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4543b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45444ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4545b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45464ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4547b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4548b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
45498554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4550481c6f50SPhilipp Reisner 		   for temporal network outages! */
45510500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4552d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45539f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4554b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4555b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
45569f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
455782bc0194SAndreas Gruenbacher 		return -EIO;
4558481c6f50SPhilipp Reisner 	}
4559b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4560b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
45610500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4562b411b363SPhilipp Reisner 
4563b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
45649f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
456582bc0194SAndreas Gruenbacher 		return -EIO;
4566b411b363SPhilipp Reisner 	}
4567b411b363SPhilipp Reisner 
45684ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
45694ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4570b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4571b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4572b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4573b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
457469a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
457569a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4576b411b363SPhilipp Reisner 		}
4577b411b363SPhilipp Reisner 	}
4578b411b363SPhilipp Reisner 
4579b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4580b411b363SPhilipp Reisner 
4581b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4582b411b363SPhilipp Reisner 
458382bc0194SAndreas Gruenbacher 	return 0;
4584b411b363SPhilipp Reisner }
4585b411b363SPhilipp Reisner 
4586bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4587b411b363SPhilipp Reisner {
45889f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4589b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4590e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
45914a76b161SAndreas Gruenbacher 
45929f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
45939f4fe9adSAndreas Gruenbacher 	if (!peer_device)
45944a76b161SAndreas Gruenbacher 		return -EIO;
45959f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4596b411b363SPhilipp Reisner 
4597b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4598b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4599b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4600b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4601b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4602b411b363SPhilipp Reisner 
46030b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4604b411b363SPhilipp Reisner 
4605b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4606b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4607b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4608b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4609b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4610b411b363SPhilipp Reisner 
4611b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4612b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4613b411b363SPhilipp Reisner 
4614b30ab791SAndreas Gruenbacher 		put_ldev(device);
4615b411b363SPhilipp Reisner 	} else
4616d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4617b411b363SPhilipp Reisner 
461882bc0194SAndreas Gruenbacher 	return 0;
4619b411b363SPhilipp Reisner }
4620b411b363SPhilipp Reisner 
46219b48ff07SLee Jones /*
46222c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46232c46407dSAndreas Gruenbacher  *
46242c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46252c46407dSAndreas Gruenbacher  * code upon failure.
46262c46407dSAndreas Gruenbacher  */
46272c46407dSAndreas Gruenbacher static int
462869a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4629e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4630b411b363SPhilipp Reisner {
463150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
463269a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4633e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
463450d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4635e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46362c46407dSAndreas Gruenbacher 	int err;
4637b411b363SPhilipp Reisner 
463850d0b1adSAndreas Gruenbacher 	if (want != size) {
463969a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46402c46407dSAndreas Gruenbacher 		return -EIO;
4641b411b363SPhilipp Reisner 	}
4642b411b363SPhilipp Reisner 	if (want == 0)
46432c46407dSAndreas Gruenbacher 		return 0;
464469a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
464582bc0194SAndreas Gruenbacher 	if (err)
46462c46407dSAndreas Gruenbacher 		return err;
4647b411b363SPhilipp Reisner 
464869a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4649b411b363SPhilipp Reisner 
4650b411b363SPhilipp Reisner 	c->word_offset += num_words;
4651b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4652b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4653b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4654b411b363SPhilipp Reisner 
46552c46407dSAndreas Gruenbacher 	return 1;
4656b411b363SPhilipp Reisner }
4657b411b363SPhilipp Reisner 
4658a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4659a02d1240SAndreas Gruenbacher {
4660a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4661a02d1240SAndreas Gruenbacher }
4662a02d1240SAndreas Gruenbacher 
4663a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4664a02d1240SAndreas Gruenbacher {
4665a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4666a02d1240SAndreas Gruenbacher }
4667a02d1240SAndreas Gruenbacher 
4668a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4669a02d1240SAndreas Gruenbacher {
4670a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4671a02d1240SAndreas Gruenbacher }
4672a02d1240SAndreas Gruenbacher 
46739b48ff07SLee Jones /*
46742c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
46752c46407dSAndreas Gruenbacher  *
46762c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46772c46407dSAndreas Gruenbacher  * code upon failure.
46782c46407dSAndreas Gruenbacher  */
46792c46407dSAndreas Gruenbacher static int
468069a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4681b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4682c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4683c6d25cfeSPhilipp Reisner 		 unsigned int len)
4684b411b363SPhilipp Reisner {
4685b411b363SPhilipp Reisner 	struct bitstream bs;
4686b411b363SPhilipp Reisner 	u64 look_ahead;
4687b411b363SPhilipp Reisner 	u64 rl;
4688b411b363SPhilipp Reisner 	u64 tmp;
4689b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4690b411b363SPhilipp Reisner 	unsigned long e;
4691a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4692b411b363SPhilipp Reisner 	int have;
4693b411b363SPhilipp Reisner 	int bits;
4694b411b363SPhilipp Reisner 
4695a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4696b411b363SPhilipp Reisner 
4697b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4698b411b363SPhilipp Reisner 	if (bits < 0)
46992c46407dSAndreas Gruenbacher 		return -EIO;
4700b411b363SPhilipp Reisner 
4701b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4702b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4703b411b363SPhilipp Reisner 		if (bits <= 0)
47042c46407dSAndreas Gruenbacher 			return -EIO;
4705b411b363SPhilipp Reisner 
4706b411b363SPhilipp Reisner 		if (toggle) {
4707b411b363SPhilipp Reisner 			e = s + rl -1;
4708b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
470969a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47102c46407dSAndreas Gruenbacher 				return -EIO;
4711b411b363SPhilipp Reisner 			}
471269a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4713b411b363SPhilipp Reisner 		}
4714b411b363SPhilipp Reisner 
4715b411b363SPhilipp Reisner 		if (have < bits) {
471669a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4717b411b363SPhilipp Reisner 				have, bits, look_ahead,
4718b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4719b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47202c46407dSAndreas Gruenbacher 			return -EIO;
4721b411b363SPhilipp Reisner 		}
4722d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4723d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4724b411b363SPhilipp Reisner 			look_ahead >>= bits;
4725d2da5b0cSLars Ellenberg 		else
4726d2da5b0cSLars Ellenberg 			look_ahead = 0;
4727b411b363SPhilipp Reisner 		have -= bits;
4728b411b363SPhilipp Reisner 
4729b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4730b411b363SPhilipp Reisner 		if (bits < 0)
47312c46407dSAndreas Gruenbacher 			return -EIO;
4732b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4733b411b363SPhilipp Reisner 		have += bits;
4734b411b363SPhilipp Reisner 	}
4735b411b363SPhilipp Reisner 
4736b411b363SPhilipp Reisner 	c->bit_offset = s;
4737b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4738b411b363SPhilipp Reisner 
47392c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4740b411b363SPhilipp Reisner }
4741b411b363SPhilipp Reisner 
47429b48ff07SLee Jones /*
47432c46407dSAndreas Gruenbacher  * decode_bitmap_c
47442c46407dSAndreas Gruenbacher  *
47452c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47462c46407dSAndreas Gruenbacher  * code upon failure.
47472c46407dSAndreas Gruenbacher  */
47482c46407dSAndreas Gruenbacher static int
474969a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4750b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4751c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4752c6d25cfeSPhilipp Reisner 		unsigned int len)
4753b411b363SPhilipp Reisner {
4754a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
475569a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4756b411b363SPhilipp Reisner 
4757b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4758b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4759b411b363SPhilipp Reisner 	 * during all our tests. */
4760b411b363SPhilipp Reisner 
476169a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
476269a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
47632c46407dSAndreas Gruenbacher 	return -EIO;
4764b411b363SPhilipp Reisner }
4765b411b363SPhilipp Reisner 
4766b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4767b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4768b411b363SPhilipp Reisner {
4769b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4770a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
477150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
477250d0b1adSAndreas Gruenbacher 	unsigned int plain =
477350d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
477450d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
477550d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
477650d0b1adSAndreas Gruenbacher 	unsigned int r;
4777b411b363SPhilipp Reisner 
4778b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4779b411b363SPhilipp Reisner 	if (total == 0)
4780b411b363SPhilipp Reisner 		return;
4781b411b363SPhilipp Reisner 
4782b411b363SPhilipp Reisner 	/* don't report if not compressed */
4783b411b363SPhilipp Reisner 	if (total >= plain)
4784b411b363SPhilipp Reisner 		return;
4785b411b363SPhilipp Reisner 
4786b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4787b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4788b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4789b411b363SPhilipp Reisner 
4790b411b363SPhilipp Reisner 	if (r > 1000)
4791b411b363SPhilipp Reisner 		r = 1000;
4792b411b363SPhilipp Reisner 
4793b411b363SPhilipp Reisner 	r = 1000 - r;
4794d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4795b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4796b411b363SPhilipp Reisner 			direction,
4797b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4798b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4799b411b363SPhilipp Reisner 			total, r/10, r % 10);
4800b411b363SPhilipp Reisner }
4801b411b363SPhilipp Reisner 
4802b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4803b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4804b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4805b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4806b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4807b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4808b411b363SPhilipp Reisner 
4809b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4810bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4811b411b363SPhilipp Reisner {
48129f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4813b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4814b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48152c46407dSAndreas Gruenbacher 	int err;
48164a76b161SAndreas Gruenbacher 
48179f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48189f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48194a76b161SAndreas Gruenbacher 		return -EIO;
48209f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4821b411b363SPhilipp Reisner 
4822b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
482320ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
482420ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4825b411b363SPhilipp Reisner 
4826b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4827b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4828b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4829b411b363SPhilipp Reisner 	};
4830b411b363SPhilipp Reisner 
48312c46407dSAndreas Gruenbacher 	for(;;) {
4832e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
483369a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4834e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4835b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4836b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4837e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4838b411b363SPhilipp Reisner 
4839bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4840d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
484182bc0194SAndreas Gruenbacher 				err = -EIO;
4842b411b363SPhilipp Reisner 				goto out;
4843b411b363SPhilipp Reisner 			}
4844e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4845d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
484682bc0194SAndreas Gruenbacher 				err = -EIO;
484778fcbdaeSAndreas Gruenbacher 				goto out;
4848b411b363SPhilipp Reisner 			}
48499f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4850e658983aSAndreas Gruenbacher 			if (err)
4851e658983aSAndreas Gruenbacher 			       goto out;
485269a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4853b411b363SPhilipp Reisner 		} else {
4854d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
485582bc0194SAndreas Gruenbacher 			err = -EIO;
4856b411b363SPhilipp Reisner 			goto out;
4857b411b363SPhilipp Reisner 		}
4858b411b363SPhilipp Reisner 
4859e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4860bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4861b411b363SPhilipp Reisner 
48622c46407dSAndreas Gruenbacher 		if (err <= 0) {
48632c46407dSAndreas Gruenbacher 			if (err < 0)
48642c46407dSAndreas Gruenbacher 				goto out;
4865b411b363SPhilipp Reisner 			break;
48662c46407dSAndreas Gruenbacher 		}
48679f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
486882bc0194SAndreas Gruenbacher 		if (err)
4869b411b363SPhilipp Reisner 			goto out;
48702c46407dSAndreas Gruenbacher 	}
4871b411b363SPhilipp Reisner 
4872b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4873b411b363SPhilipp Reisner 
4874b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4875de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4876de1f8e4aSAndreas Gruenbacher 
4877b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
487882bc0194SAndreas Gruenbacher 		if (err)
4879b411b363SPhilipp Reisner 			goto out;
4880b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4881b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
48820b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4883b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4884b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4885b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4886d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4887b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4888b411b363SPhilipp Reisner 	}
488982bc0194SAndreas Gruenbacher 	err = 0;
4890b411b363SPhilipp Reisner 
4891b411b363SPhilipp Reisner  out:
4892b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4893b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4894b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
489582bc0194SAndreas Gruenbacher 	return err;
4896b411b363SPhilipp Reisner }
4897b411b363SPhilipp Reisner 
4898bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4899b411b363SPhilipp Reisner {
49001ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4901e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4902b411b363SPhilipp Reisner 
4903bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4904b411b363SPhilipp Reisner }
4905b411b363SPhilipp Reisner 
4906bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4907b411b363SPhilipp Reisner {
4908b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4909b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4910ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
491182bc0194SAndreas Gruenbacher 	return 0;
4912b411b363SPhilipp Reisner }
4913b411b363SPhilipp Reisner 
4914bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
491573a01a18SPhilipp Reisner {
49169f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4917b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4918e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49194a76b161SAndreas Gruenbacher 
49209f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49219f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49224a76b161SAndreas Gruenbacher 		return -EIO;
49239f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
492473a01a18SPhilipp Reisner 
4925b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4926f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4927f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4928f735e363SLars Ellenberg 	case C_BEHIND:
4929f735e363SLars Ellenberg 			break;
4930f735e363SLars Ellenberg 	default:
4931d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4932b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4933f735e363SLars Ellenberg 	}
4934f735e363SLars Ellenberg 
4935b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
493673a01a18SPhilipp Reisner 
493782bc0194SAndreas Gruenbacher 	return 0;
493873a01a18SPhilipp Reisner }
493973a01a18SPhilipp Reisner 
4940700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4941700ca8c0SPhilipp Reisner {
4942700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4943700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4944700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4945700ca8c0SPhilipp Reisner 	sector_t sector;
4946700ca8c0SPhilipp Reisner 	int size, err = 0;
4947700ca8c0SPhilipp Reisner 
4948700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
4949700ca8c0SPhilipp Reisner 	if (!peer_device)
4950700ca8c0SPhilipp Reisner 		return -EIO;
4951700ca8c0SPhilipp Reisner 	device = peer_device->device;
4952700ca8c0SPhilipp Reisner 
4953700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
4954700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
4955700ca8c0SPhilipp Reisner 
4956700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
4957700ca8c0SPhilipp Reisner 
4958700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
4959700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
4960700ca8c0SPhilipp Reisner 
4961700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
49629104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
4963700ca8c0SPhilipp Reisner 		if (!peer_req) {
4964700ca8c0SPhilipp Reisner 			put_ldev(device);
4965700ca8c0SPhilipp Reisner 			return -ENOMEM;
4966700ca8c0SPhilipp Reisner 		}
4967700ca8c0SPhilipp Reisner 
4968700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
4969ce668b6dSChristoph Böhmwalder 		peer_req->opf = REQ_OP_DISCARD;
4970700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
4971f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
4972700ca8c0SPhilipp Reisner 
4973700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
4974700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
4975700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
4976700ca8c0SPhilipp Reisner 
4977700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
4978ce668b6dSChristoph Böhmwalder 		err = drbd_submit_peer_request(peer_req);
4979700ca8c0SPhilipp Reisner 
4980700ca8c0SPhilipp Reisner 		if (err) {
4981700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
4982700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
4983700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
4984700ca8c0SPhilipp Reisner 
4985700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
4986700ca8c0SPhilipp Reisner 			put_ldev(device);
4987700ca8c0SPhilipp Reisner 			err = 0;
4988700ca8c0SPhilipp Reisner 			goto fail;
4989700ca8c0SPhilipp Reisner 		}
4990700ca8c0SPhilipp Reisner 
4991700ca8c0SPhilipp Reisner 		inc_unacked(device);
4992700ca8c0SPhilipp Reisner 
4993700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4994700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
4995700ca8c0SPhilipp Reisner 	} else {
4996700ca8c0SPhilipp Reisner 	fail:
4997700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
4998700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
4999700ca8c0SPhilipp Reisner 	}
5000700ca8c0SPhilipp Reisner 
5001700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5002700ca8c0SPhilipp Reisner 
5003700ca8c0SPhilipp Reisner 	return err;
5004700ca8c0SPhilipp Reisner }
5005700ca8c0SPhilipp Reisner 
500602918be2SPhilipp Reisner struct data_cmd {
500702918be2SPhilipp Reisner 	int expect_payload;
50089104d31aSLars Ellenberg 	unsigned int pkt_size;
5009bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5010b411b363SPhilipp Reisner };
5011b411b363SPhilipp Reisner 
501202918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
501302918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
501402918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
501502918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
501602918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5017e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5018e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5019e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
502002918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
502102918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5022e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5023e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
502402918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
502502918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
502602918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
502702918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
502802918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
502902918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
503002918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
503102918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
503202918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5033700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
503402918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
503573a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50364a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5037036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5038a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5039f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5040700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
504102918be2SPhilipp Reisner };
504202918be2SPhilipp Reisner 
5043bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5044b411b363SPhilipp Reisner {
504577351055SPhilipp Reisner 	struct packet_info pi;
504602918be2SPhilipp Reisner 	size_t shs; /* sub header size */
504782bc0194SAndreas Gruenbacher 	int err;
5048b411b363SPhilipp Reisner 
5049bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
50509104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5051deebe195SAndreas Gruenbacher 
5052bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5053c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5054c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
505502918be2SPhilipp Reisner 			goto err_out;
505602918be2SPhilipp Reisner 
5057deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
50584a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
50591ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
50602fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
506102918be2SPhilipp Reisner 			goto err_out;
50620b33a916SLars Ellenberg 		}
5063b411b363SPhilipp Reisner 
5064e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
50659104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
50669104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5067e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
50681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
50692fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5070c13f7e1aSLars Ellenberg 			goto err_out;
5071c13f7e1aSLars Ellenberg 		}
50729104d31aSLars Ellenberg 		if (pi.size < shs) {
50739104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
50749104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
50759104d31aSLars Ellenberg 			goto err_out;
50769104d31aSLars Ellenberg 		}
5077c13f7e1aSLars Ellenberg 
5078c13f7e1aSLars Ellenberg 		if (shs) {
5079944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5080bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5081a5c31904SAndreas Gruenbacher 			if (err)
508202918be2SPhilipp Reisner 				goto err_out;
5083e2857216SAndreas Gruenbacher 			pi.size -= shs;
5084b411b363SPhilipp Reisner 		}
508502918be2SPhilipp Reisner 
5086944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5087bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
50884a76b161SAndreas Gruenbacher 		if (err) {
50891ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
50909f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
509102918be2SPhilipp Reisner 			goto err_out;
509202918be2SPhilipp Reisner 		}
509302918be2SPhilipp Reisner 	}
509482bc0194SAndreas Gruenbacher 	return;
509502918be2SPhilipp Reisner 
509602918be2SPhilipp Reisner     err_out:
5097bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5098b411b363SPhilipp Reisner }
5099b411b363SPhilipp Reisner 
5100bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5101f70b3511SPhilipp Reisner {
5102c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5103bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5104376694a0SPhilipp Reisner 	int vnr;
5105f70b3511SPhilipp Reisner 
5106bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5107b411b363SPhilipp Reisner 		return;
5108b411b363SPhilipp Reisner 
5109545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5110545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5111545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5112545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5113545752d5SLars Ellenberg 	 */
5114bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5115545752d5SLars Ellenberg 
5116668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51171c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5118668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5119668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5120668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5121668700b4SPhilipp Reisner 	}
5122bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5123360cc740SPhilipp Reisner 
5124c141ebdaSPhilipp Reisner 	rcu_read_lock();
5125c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5126c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5127b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5128c141ebdaSPhilipp Reisner 		rcu_read_unlock();
512969a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5130c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5131c141ebdaSPhilipp Reisner 		rcu_read_lock();
5132c141ebdaSPhilipp Reisner 	}
5133c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5134c141ebdaSPhilipp Reisner 
5135bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51361ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
513712038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5138bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5139bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
514012038a3aSPhilipp Reisner 
51411ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5142360cc740SPhilipp Reisner 
5143bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5144bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5145cb703454SPhilipp Reisner 
51460500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5147bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5148bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5149bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5150bbeb641cSPhilipp Reisner 
51510500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5152360cc740SPhilipp Reisner 
5153f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5154bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5155360cc740SPhilipp Reisner }
5156360cc740SPhilipp Reisner 
515769a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5158360cc740SPhilipp Reisner {
515969a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5160360cc740SPhilipp Reisner 	unsigned int i;
5161b411b363SPhilipp Reisner 
516285719573SPhilipp Reisner 	/* wait for current activity to cease. */
51630500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5164b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5165b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5166b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
51670500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5168b411b363SPhilipp Reisner 
5169b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5170b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5171b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5172b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5173b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5174b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5175b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5176b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5177b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5178b411b363SPhilipp Reisner 	 *  on the fly. */
5179b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5180b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5181b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5182b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5183b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5184b411b363SPhilipp Reisner 
5185b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
51862bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5187b411b363SPhilipp Reisner 
5188b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5189b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5190b411b363SPhilipp Reisner 	 * to be "canceled" */
5191b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5192b411b363SPhilipp Reisner 
5193b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5194b411b363SPhilipp Reisner 
5195d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5196d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5197d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5198b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5199d10b4ea3SPhilipp Reisner 
520008332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
520108332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5202b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5203b411b363SPhilipp Reisner 
5204b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5205b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5206b411b363SPhilipp Reisner 
5207b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
520869a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5209b411b363SPhilipp Reisner 
5210b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5211b411b363SPhilipp Reisner 
5212be115b69SLars Ellenberg 	if (get_ldev(device)) {
5213be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5214be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5215be115b69SLars Ellenberg 		put_ldev(device);
5216be115b69SLars Ellenberg 	}
521720ceb2b2SLars Ellenberg 
5218b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5219b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5220b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5221b411b363SPhilipp Reisner 	 *
5222b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5223b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5224b411b363SPhilipp Reisner 	 */
5225b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5226b411b363SPhilipp Reisner 	if (i)
5227d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5228b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5229435f0740SLars Ellenberg 	if (i)
5230d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5231b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5232b411b363SPhilipp Reisner 	if (i)
5233d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5234b411b363SPhilipp Reisner 
52350b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52360b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52370b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52380b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5239b411b363SPhilipp Reisner 
5240360cc740SPhilipp Reisner 	return 0;
5241b411b363SPhilipp Reisner }
5242b411b363SPhilipp Reisner 
5243b411b363SPhilipp Reisner /*
5244b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5245b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5246b411b363SPhilipp Reisner  *
5247b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5248b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5249b411b363SPhilipp Reisner  *
5250b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5251b411b363SPhilipp Reisner  */
5252bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5253b411b363SPhilipp Reisner {
52549f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
52559f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5256b411b363SPhilipp Reisner 
5257bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5258bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
52599f5bdc33SAndreas Gruenbacher 	if (!p)
5260e8d17b01SAndreas Gruenbacher 		return -EIO;
5261b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5262b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5263b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
526420c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5265bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5266b411b363SPhilipp Reisner }
5267b411b363SPhilipp Reisner 
5268b411b363SPhilipp Reisner /*
5269b411b363SPhilipp Reisner  * return values:
5270b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5271b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5272b411b363SPhilipp Reisner  *  -1 peer talks different language,
5273b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5274b411b363SPhilipp Reisner  */
5275bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5276b411b363SPhilipp Reisner {
5277bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5278e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5279e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
528077351055SPhilipp Reisner 	struct packet_info pi;
5281a5c31904SAndreas Gruenbacher 	int err;
5282b411b363SPhilipp Reisner 
5283bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5284e8d17b01SAndreas Gruenbacher 	if (err)
5285b411b363SPhilipp Reisner 		return 0;
5286b411b363SPhilipp Reisner 
5287bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
528869bc7bc3SAndreas Gruenbacher 	if (err)
5289b411b363SPhilipp Reisner 		return 0;
5290b411b363SPhilipp Reisner 
52916038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
52921ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
529377351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5294b411b363SPhilipp Reisner 		return -1;
5295b411b363SPhilipp Reisner 	}
5296b411b363SPhilipp Reisner 
529777351055SPhilipp Reisner 	if (pi.size != expect) {
52981ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
529977351055SPhilipp Reisner 		     expect, pi.size);
5300b411b363SPhilipp Reisner 		return -1;
5301b411b363SPhilipp Reisner 	}
5302b411b363SPhilipp Reisner 
5303e658983aSAndreas Gruenbacher 	p = pi.data;
5304bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5305a5c31904SAndreas Gruenbacher 	if (err)
5306b411b363SPhilipp Reisner 		return 0;
5307b411b363SPhilipp Reisner 
5308b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5309b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5310b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5311b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5312b411b363SPhilipp Reisner 
5313b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5314b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5315b411b363SPhilipp Reisner 		goto incompat;
5316b411b363SPhilipp Reisner 
5317bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
531820c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5319b411b363SPhilipp Reisner 
53201ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5321bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5322b411b363SPhilipp Reisner 
5323f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53249104d31aSLars Ellenberg 		  connection->agreed_features,
53259104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53269104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5327f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5328f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53299104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
533092d94ae6SPhilipp Reisner 
5331b411b363SPhilipp Reisner 	return 1;
5332b411b363SPhilipp Reisner 
5333b411b363SPhilipp Reisner  incompat:
53341ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5335b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5336b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5337b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5338b411b363SPhilipp Reisner 	return -1;
5339b411b363SPhilipp Reisner }
5340b411b363SPhilipp Reisner 
5341b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5342bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5343b411b363SPhilipp Reisner {
53441ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53451ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5346b10d96cbSJohannes Thoma 	return -1;
5347b411b363SPhilipp Reisner }
5348b411b363SPhilipp Reisner #else
5349b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5350b10d96cbSJohannes Thoma 
5351b10d96cbSJohannes Thoma /* Return value:
5352b10d96cbSJohannes Thoma 	1 - auth succeeded,
5353b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5354b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5355b10d96cbSJohannes Thoma */
5356b10d96cbSJohannes Thoma 
5357bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5358b411b363SPhilipp Reisner {
53599f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5360b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5361b411b363SPhilipp Reisner 	char *response = NULL;
5362b411b363SPhilipp Reisner 	char *right_response = NULL;
5363b411b363SPhilipp Reisner 	char *peers_ch = NULL;
536444ed167dSPhilipp Reisner 	unsigned int key_len;
536544ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5366b411b363SPhilipp Reisner 	unsigned int resp_size;
536777ce56e2SArnd Bergmann 	struct shash_desc *desc;
536877351055SPhilipp Reisner 	struct packet_info pi;
536944ed167dSPhilipp Reisner 	struct net_conf *nc;
537069bc7bc3SAndreas Gruenbacher 	int err, rv;
5371b411b363SPhilipp Reisner 
53729f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
53739f5bdc33SAndreas Gruenbacher 
537444ed167dSPhilipp Reisner 	rcu_read_lock();
5375bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
537644ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
537744ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
537844ed167dSPhilipp Reisner 	rcu_read_unlock();
537944ed167dSPhilipp Reisner 
538077ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
538177ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
538277ce56e2SArnd Bergmann 		       GFP_KERNEL);
538377ce56e2SArnd Bergmann 	if (!desc) {
538477ce56e2SArnd Bergmann 		rv = -1;
538577ce56e2SArnd Bergmann 		goto fail;
538677ce56e2SArnd Bergmann 	}
53879534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5388b411b363SPhilipp Reisner 
53899534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5390b411b363SPhilipp Reisner 	if (rv) {
53919534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5392b10d96cbSJohannes Thoma 		rv = -1;
5393b411b363SPhilipp Reisner 		goto fail;
5394b411b363SPhilipp Reisner 	}
5395b411b363SPhilipp Reisner 
5396b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5397b411b363SPhilipp Reisner 
5398bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5399bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54009f5bdc33SAndreas Gruenbacher 		rv = 0;
54019f5bdc33SAndreas Gruenbacher 		goto fail;
54029f5bdc33SAndreas Gruenbacher 	}
5403bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54049f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5405b411b363SPhilipp Reisner 	if (!rv)
5406b411b363SPhilipp Reisner 		goto fail;
5407b411b363SPhilipp Reisner 
5408bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
540969bc7bc3SAndreas Gruenbacher 	if (err) {
5410b411b363SPhilipp Reisner 		rv = 0;
5411b411b363SPhilipp Reisner 		goto fail;
5412b411b363SPhilipp Reisner 	}
5413b411b363SPhilipp Reisner 
541477351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54151ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
541677351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54179049ccd4SLars Ellenberg 		rv = -1;
5418b411b363SPhilipp Reisner 		goto fail;
5419b411b363SPhilipp Reisner 	}
5420b411b363SPhilipp Reisner 
542177351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54221ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5423b10d96cbSJohannes Thoma 		rv = -1;
5424b411b363SPhilipp Reisner 		goto fail;
5425b411b363SPhilipp Reisner 	}
5426b411b363SPhilipp Reisner 
542767cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
542867cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
542967cca286SPhilipp Reisner 		rv = -1;
543067cca286SPhilipp Reisner 		goto fail;
543167cca286SPhilipp Reisner 	}
543267cca286SPhilipp Reisner 
543377351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
54348404e191SZhen Lei 	if (!peers_ch) {
5435b10d96cbSJohannes Thoma 		rv = -1;
5436b411b363SPhilipp Reisner 		goto fail;
5437b411b363SPhilipp Reisner 	}
5438b411b363SPhilipp Reisner 
5439bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5440a5c31904SAndreas Gruenbacher 	if (err) {
5441b411b363SPhilipp Reisner 		rv = 0;
5442b411b363SPhilipp Reisner 		goto fail;
5443b411b363SPhilipp Reisner 	}
5444b411b363SPhilipp Reisner 
544567cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
544667cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
544767cca286SPhilipp Reisner 		rv = -1;
544867cca286SPhilipp Reisner 		goto fail;
544967cca286SPhilipp Reisner 	}
545067cca286SPhilipp Reisner 
54519534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5452b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
54538404e191SZhen Lei 	if (!response) {
5454b10d96cbSJohannes Thoma 		rv = -1;
5455b411b363SPhilipp Reisner 		goto fail;
5456b411b363SPhilipp Reisner 	}
5457b411b363SPhilipp Reisner 
54589534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5459b411b363SPhilipp Reisner 	if (rv) {
54601ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5461b10d96cbSJohannes Thoma 		rv = -1;
5462b411b363SPhilipp Reisner 		goto fail;
5463b411b363SPhilipp Reisner 	}
5464b411b363SPhilipp Reisner 
5465bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54669f5bdc33SAndreas Gruenbacher 		rv = 0;
54679f5bdc33SAndreas Gruenbacher 		goto fail;
54689f5bdc33SAndreas Gruenbacher 	}
5469bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
54709f5bdc33SAndreas Gruenbacher 				response, resp_size);
5471b411b363SPhilipp Reisner 	if (!rv)
5472b411b363SPhilipp Reisner 		goto fail;
5473b411b363SPhilipp Reisner 
5474bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
547569bc7bc3SAndreas Gruenbacher 	if (err) {
5476b411b363SPhilipp Reisner 		rv = 0;
5477b411b363SPhilipp Reisner 		goto fail;
5478b411b363SPhilipp Reisner 	}
5479b411b363SPhilipp Reisner 
548077351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
54811ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
548277351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5483b411b363SPhilipp Reisner 		rv = 0;
5484b411b363SPhilipp Reisner 		goto fail;
5485b411b363SPhilipp Reisner 	}
5486b411b363SPhilipp Reisner 
548777351055SPhilipp Reisner 	if (pi.size != resp_size) {
54881ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5489b411b363SPhilipp Reisner 		rv = 0;
5490b411b363SPhilipp Reisner 		goto fail;
5491b411b363SPhilipp Reisner 	}
5492b411b363SPhilipp Reisner 
5493bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5494a5c31904SAndreas Gruenbacher 	if (err) {
5495b411b363SPhilipp Reisner 		rv = 0;
5496b411b363SPhilipp Reisner 		goto fail;
5497b411b363SPhilipp Reisner 	}
5498b411b363SPhilipp Reisner 
5499b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55008404e191SZhen Lei 	if (!right_response) {
5501b10d96cbSJohannes Thoma 		rv = -1;
5502b411b363SPhilipp Reisner 		goto fail;
5503b411b363SPhilipp Reisner 	}
5504b411b363SPhilipp Reisner 
55059534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55069534d671SHerbert Xu 				 right_response);
5507b411b363SPhilipp Reisner 	if (rv) {
55081ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5509b10d96cbSJohannes Thoma 		rv = -1;
5510b411b363SPhilipp Reisner 		goto fail;
5511b411b363SPhilipp Reisner 	}
5512b411b363SPhilipp Reisner 
5513b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5514b411b363SPhilipp Reisner 
5515b411b363SPhilipp Reisner 	if (rv)
55161ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
551744ed167dSPhilipp Reisner 		     resp_size);
5518b10d96cbSJohannes Thoma 	else
5519b10d96cbSJohannes Thoma 		rv = -1;
5520b411b363SPhilipp Reisner 
5521b411b363SPhilipp Reisner  fail:
5522b411b363SPhilipp Reisner 	kfree(peers_ch);
5523b411b363SPhilipp Reisner 	kfree(response);
5524b411b363SPhilipp Reisner 	kfree(right_response);
552577ce56e2SArnd Bergmann 	if (desc) {
55269534d671SHerbert Xu 		shash_desc_zero(desc);
552777ce56e2SArnd Bergmann 		kfree(desc);
552877ce56e2SArnd Bergmann 	}
5529b411b363SPhilipp Reisner 
5530b411b363SPhilipp Reisner 	return rv;
5531b411b363SPhilipp Reisner }
5532b411b363SPhilipp Reisner #endif
5533b411b363SPhilipp Reisner 
55348fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5535b411b363SPhilipp Reisner {
5536bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5537b411b363SPhilipp Reisner 	int h;
5538b411b363SPhilipp Reisner 
55391ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5540b411b363SPhilipp Reisner 
5541b411b363SPhilipp Reisner 	do {
5542bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5543b411b363SPhilipp Reisner 		if (h == 0) {
5544bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
554520ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5546b411b363SPhilipp Reisner 		}
5547b411b363SPhilipp Reisner 		if (h == -1) {
55481ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5549bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5550b411b363SPhilipp Reisner 		}
5551b411b363SPhilipp Reisner 	} while (h == 0);
5552b411b363SPhilipp Reisner 
5553c51a0ef3SLars Ellenberg 	if (h > 0) {
5554c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5555bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5556c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5557c51a0ef3SLars Ellenberg 	}
5558b411b363SPhilipp Reisner 
5559bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5560b411b363SPhilipp Reisner 
55611ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5562b411b363SPhilipp Reisner 	return 0;
5563b411b363SPhilipp Reisner }
5564b411b363SPhilipp Reisner 
5565b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5566b411b363SPhilipp Reisner 
5567bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5568b411b363SPhilipp Reisner {
5569e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5570b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5571b411b363SPhilipp Reisner 
5572b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5573bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5574b411b363SPhilipp Reisner 	} else {
5575bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
55761ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5577fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5578fc3b10a4SPhilipp Reisner 	}
5579bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5580e4f78edeSPhilipp Reisner 
55812735a594SAndreas Gruenbacher 	return 0;
5582fc3b10a4SPhilipp Reisner }
5583e4f78edeSPhilipp Reisner 
5584bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5585e4f78edeSPhilipp Reisner {
55869f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5587b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5588e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5589e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5590e4f78edeSPhilipp Reisner 
55919f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
55929f4fe9adSAndreas Gruenbacher 	if (!peer_device)
55932735a594SAndreas Gruenbacher 		return -EIO;
55949f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
55951952e916SAndreas Gruenbacher 
5596bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
55970b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5598bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
55994d0fc3fdSPhilipp Reisner 	}
56004d0fc3fdSPhilipp Reisner 
5601e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5602b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5603e4f78edeSPhilipp Reisner 	} else {
5604b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5605d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5606b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5607b411b363SPhilipp Reisner 	}
5608b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5609b411b363SPhilipp Reisner 
56102735a594SAndreas Gruenbacher 	return 0;
5611b411b363SPhilipp Reisner }
5612b411b363SPhilipp Reisner 
5613bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5614b411b363SPhilipp Reisner {
5615bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5616b411b363SPhilipp Reisner 
5617b411b363SPhilipp Reisner }
5618b411b363SPhilipp Reisner 
5619bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5620b411b363SPhilipp Reisner {
5621b411b363SPhilipp Reisner 	/* restore idle timeout */
5622bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5623bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5624bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5625b411b363SPhilipp Reisner 
56262735a594SAndreas Gruenbacher 	return 0;
5627b411b363SPhilipp Reisner }
5628b411b363SPhilipp Reisner 
5629bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5630b411b363SPhilipp Reisner {
56319f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5632b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5633e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5634b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5635b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5636b411b363SPhilipp Reisner 
56379f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56389f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56392735a594SAndreas Gruenbacher 		return -EIO;
56409f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56411952e916SAndreas Gruenbacher 
56429f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5643b411b363SPhilipp Reisner 
564469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5645b411b363SPhilipp Reisner 
5646b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5647b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5648b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5649b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5650b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5651b30ab791SAndreas Gruenbacher 		put_ldev(device);
56521d53f09eSLars Ellenberg 	}
5653b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5654b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5655b411b363SPhilipp Reisner 
56562735a594SAndreas Gruenbacher 	return 0;
5657b411b363SPhilipp Reisner }
5658b411b363SPhilipp Reisner 
5659bc9c5c41SAndreas Gruenbacher static int
5660b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5661bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5662bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5663b411b363SPhilipp Reisner {
5664b411b363SPhilipp Reisner 	struct drbd_request *req;
5665b411b363SPhilipp Reisner 	struct bio_and_error m;
5666b411b363SPhilipp Reisner 
56670500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5668b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5669b411b363SPhilipp Reisner 	if (unlikely(!req)) {
56700500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
567185997675SAndreas Gruenbacher 		return -EIO;
5672b411b363SPhilipp Reisner 	}
5673b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
56740500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5675b411b363SPhilipp Reisner 
5676b411b363SPhilipp Reisner 	if (m.bio)
5677b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
567885997675SAndreas Gruenbacher 	return 0;
5679b411b363SPhilipp Reisner }
5680b411b363SPhilipp Reisner 
5681bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5682b411b363SPhilipp Reisner {
56839f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5684b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5685e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5686b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5687b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5688b411b363SPhilipp Reisner 	enum drbd_req_event what;
5689b411b363SPhilipp Reisner 
56909f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56919f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56922735a594SAndreas Gruenbacher 		return -EIO;
56939f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56941952e916SAndreas Gruenbacher 
569569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5696b411b363SPhilipp Reisner 
5697579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5698b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5699b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
57002735a594SAndreas Gruenbacher 		return 0;
5701b411b363SPhilipp Reisner 	}
5702e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5703b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57048554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5705b411b363SPhilipp Reisner 		break;
5706b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57078554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5708b411b363SPhilipp Reisner 		break;
5709b411b363SPhilipp Reisner 	case P_RECV_ACK:
57108554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5711b411b363SPhilipp Reisner 		break;
5712d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5713d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57147be8da07SAndreas Gruenbacher 		break;
57157be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57167be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5717b411b363SPhilipp Reisner 		break;
5718b411b363SPhilipp Reisner 	default:
57192735a594SAndreas Gruenbacher 		BUG();
5720b411b363SPhilipp Reisner 	}
5721b411b363SPhilipp Reisner 
5722b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5723b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5724bc9c5c41SAndreas Gruenbacher 					     what, false);
5725b411b363SPhilipp Reisner }
5726b411b363SPhilipp Reisner 
5727bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5728b411b363SPhilipp Reisner {
57299f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5730b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5731e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5732b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57332deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
573485997675SAndreas Gruenbacher 	int err;
5735b411b363SPhilipp Reisner 
57369f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57379f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57382735a594SAndreas Gruenbacher 		return -EIO;
57399f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5740b411b363SPhilipp Reisner 
574169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5742b411b363SPhilipp Reisner 
5743579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5744b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5745b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
57462735a594SAndreas Gruenbacher 		return 0;
5747b411b363SPhilipp Reisner 	}
57482deb8336SPhilipp Reisner 
5749b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5750b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5751303d1448SPhilipp Reisner 					    NEG_ACKED, true);
575285997675SAndreas Gruenbacher 	if (err) {
57532deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
57542deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5755c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
57562deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
57572deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5758b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
57592deb8336SPhilipp Reisner 	}
57602735a594SAndreas Gruenbacher 	return 0;
5761b411b363SPhilipp Reisner }
5762b411b363SPhilipp Reisner 
5763bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5764b411b363SPhilipp Reisner {
57659f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5766b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5767e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5768b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5769b411b363SPhilipp Reisner 
57709f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57719f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57722735a594SAndreas Gruenbacher 		return -EIO;
57739f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57741952e916SAndreas Gruenbacher 
577569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
57767be8da07SAndreas Gruenbacher 
5777d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5778b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5779b411b363SPhilipp Reisner 
5780b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5781b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
57828554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5783b411b363SPhilipp Reisner }
5784b411b363SPhilipp Reisner 
5785bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5786b411b363SPhilipp Reisner {
57879f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5788b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5789b411b363SPhilipp Reisner 	sector_t sector;
5790b411b363SPhilipp Reisner 	int size;
5791e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
57921952e916SAndreas Gruenbacher 
57939f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57949f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57952735a594SAndreas Gruenbacher 		return -EIO;
57969f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5797b411b363SPhilipp Reisner 
5798b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5799b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5800b411b363SPhilipp Reisner 
580169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5802b411b363SPhilipp Reisner 
5803b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5804b411b363SPhilipp Reisner 
5805b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5806b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5807e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5808d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5809b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
58106327c911SGustavo A. R. Silva 			break;
5811d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5812d612d309SPhilipp Reisner 			break;
5813d612d309SPhilipp Reisner 		default:
58142735a594SAndreas Gruenbacher 			BUG();
5815d612d309SPhilipp Reisner 		}
5816b30ab791SAndreas Gruenbacher 		put_ldev(device);
5817b411b363SPhilipp Reisner 	}
5818b411b363SPhilipp Reisner 
58192735a594SAndreas Gruenbacher 	return 0;
5820b411b363SPhilipp Reisner }
5821b411b363SPhilipp Reisner 
5822bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5823b411b363SPhilipp Reisner {
5824e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5825c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58269ed57dcbSLars Ellenberg 	int vnr;
5827b411b363SPhilipp Reisner 
5828bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5829b411b363SPhilipp Reisner 
58309ed57dcbSLars Ellenberg 	rcu_read_lock();
5831c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5832c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5833c06ece6bSAndreas Gruenbacher 
5834b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5835b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5836b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5837b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5838b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5839c4752ef1SPhilipp Reisner 		}
58409ed57dcbSLars Ellenberg 	}
58419ed57dcbSLars Ellenberg 	rcu_read_unlock();
5842c4752ef1SPhilipp Reisner 
58432735a594SAndreas Gruenbacher 	return 0;
5844b411b363SPhilipp Reisner }
5845b411b363SPhilipp Reisner 
5846bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5847b411b363SPhilipp Reisner {
58489f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5849b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5850e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
585184b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5852b411b363SPhilipp Reisner 	sector_t sector;
5853b411b363SPhilipp Reisner 	int size;
5854b411b363SPhilipp Reisner 
58559f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58569f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58572735a594SAndreas Gruenbacher 		return -EIO;
58589f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58591952e916SAndreas Gruenbacher 
5860b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5861b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5862b411b363SPhilipp Reisner 
586369a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5864b411b363SPhilipp Reisner 
5865b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5866b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5867b411b363SPhilipp Reisner 	else
5868b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5869b411b363SPhilipp Reisner 
5870b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
58712735a594SAndreas Gruenbacher 		return 0;
58721d53f09eSLars Ellenberg 
5873b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5874b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5875b411b363SPhilipp Reisner 
5876b30ab791SAndreas Gruenbacher 	--device->ov_left;
5877ea5442afSLars Ellenberg 
5878ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5879b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5880b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5881ea5442afSLars Ellenberg 
5882b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
588384b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
588484b8c06bSAndreas Gruenbacher 		if (dw) {
588584b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
588684b8c06bSAndreas Gruenbacher 			dw->device = device;
588784b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5888b411b363SPhilipp Reisner 		} else {
588984b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5890b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5891b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5892b411b363SPhilipp Reisner 		}
5893b411b363SPhilipp Reisner 	}
5894b30ab791SAndreas Gruenbacher 	put_ldev(device);
58952735a594SAndreas Gruenbacher 	return 0;
5896b411b363SPhilipp Reisner }
5897b411b363SPhilipp Reisner 
5898bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
58990ced55a3SPhilipp Reisner {
59002735a594SAndreas Gruenbacher 	return 0;
59010ced55a3SPhilipp Reisner }
59020ced55a3SPhilipp Reisner 
5903668700b4SPhilipp Reisner struct meta_sock_cmd {
5904b411b363SPhilipp Reisner 	size_t pkt_size;
5905bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5906b411b363SPhilipp Reisner };
5907b411b363SPhilipp Reisner 
5908668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5909668700b4SPhilipp Reisner {
5910668700b4SPhilipp Reisner 	long t;
5911668700b4SPhilipp Reisner 	struct net_conf *nc;
5912668700b4SPhilipp Reisner 
5913668700b4SPhilipp Reisner 	rcu_read_lock();
5914668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5915668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5916668700b4SPhilipp Reisner 	rcu_read_unlock();
5917668700b4SPhilipp Reisner 
5918668700b4SPhilipp Reisner 	t *= HZ;
5919668700b4SPhilipp Reisner 	if (ping_timeout)
5920668700b4SPhilipp Reisner 		t /= 10;
5921668700b4SPhilipp Reisner 
5922668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5923668700b4SPhilipp Reisner }
5924668700b4SPhilipp Reisner 
5925668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5926668700b4SPhilipp Reisner {
5927668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5928668700b4SPhilipp Reisner }
5929668700b4SPhilipp Reisner 
5930668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5931668700b4SPhilipp Reisner {
5932668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5933668700b4SPhilipp Reisner }
5934668700b4SPhilipp Reisner 
5935668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5936e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5937e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5938b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5939b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5940b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5941d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5942b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5943b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5944b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5945b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5946b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5947b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5948b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
594902918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5950d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59511952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59521952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5953b411b363SPhilipp Reisner };
5954b411b363SPhilipp Reisner 
59551c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5956b411b363SPhilipp Reisner {
5957bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5958668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
595977351055SPhilipp Reisner 	struct packet_info pi;
5960668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5961257d0af6SPhilipp Reisner 	int rv;
5962bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5963b411b363SPhilipp Reisner 	int received = 0;
5964bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
596552b061a4SAndreas Gruenbacher 	int expect   = header_size;
596644ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
5967b411b363SPhilipp Reisner 
59688b700983SPeter Zijlstra 	sched_set_fifo_low(current);
5969b411b363SPhilipp Reisner 
5970e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
597180822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
597244ed167dSPhilipp Reisner 
5973668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
597444ed167dSPhilipp Reisner 
5975bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5976bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
59771ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
5978841ce241SAndreas Gruenbacher 				goto reconnect;
5979841ce241SAndreas Gruenbacher 			}
5980668700b4SPhilipp Reisner 			set_ping_timeout(connection);
598144ed167dSPhilipp Reisner 			ping_timeout_active = true;
5982b411b363SPhilipp Reisner 		}
5983b411b363SPhilipp Reisner 
5984668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
5985bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5986b411b363SPhilipp Reisner 
5987b411b363SPhilipp Reisner 		/* Note:
5988b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5989b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5990b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5991b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5992b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5993b411b363SPhilipp Reisner 		 * rv == expected: full header or command
5994b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
5995b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
5996b411b363SPhilipp Reisner 		 */
5997b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
5998b411b363SPhilipp Reisner 			received += rv;
5999b411b363SPhilipp Reisner 			buf	 += rv;
6000b411b363SPhilipp Reisner 		} else if (rv == 0) {
6001bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6002b66623e3SPhilipp Reisner 				long t;
6003b66623e3SPhilipp Reisner 				rcu_read_lock();
6004bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6005b66623e3SPhilipp Reisner 				rcu_read_unlock();
6006b66623e3SPhilipp Reisner 
6007bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6008bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6009b66623e3SPhilipp Reisner 						       t);
6010599377acSPhilipp Reisner 				if (t)
6011599377acSPhilipp Reisner 					break;
6012599377acSPhilipp Reisner 			}
60131ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6014b411b363SPhilipp Reisner 			goto reconnect;
6015b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6016cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6017cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6018668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6019cb6518cbSLars Ellenberg 				continue;
6020f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60211ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6022b411b363SPhilipp Reisner 				goto reconnect;
6023b411b363SPhilipp Reisner 			}
6024bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6025b411b363SPhilipp Reisner 			continue;
6026b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6027668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6028668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6029668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6030668700b4SPhilipp Reisner 			flush_signals(current);
6031b411b363SPhilipp Reisner 			continue;
6032b411b363SPhilipp Reisner 		} else {
60331ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6034b411b363SPhilipp Reisner 			goto reconnect;
6035b411b363SPhilipp Reisner 		}
6036b411b363SPhilipp Reisner 
6037b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6038bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6039b411b363SPhilipp Reisner 				goto reconnect;
6040668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6041668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60421ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60432fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6044b411b363SPhilipp Reisner 				goto disconnect;
6045b411b363SPhilipp Reisner 			}
6046e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
604752b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60481ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
604977351055SPhilipp Reisner 					pi.cmd, pi.size);
6050b411b363SPhilipp Reisner 				goto reconnect;
6051b411b363SPhilipp Reisner 			}
6052257d0af6SPhilipp Reisner 		}
6053b411b363SPhilipp Reisner 		if (received == expect) {
60542735a594SAndreas Gruenbacher 			bool err;
6055a4fbda8eSPhilipp Reisner 
6056bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
60572735a594SAndreas Gruenbacher 			if (err) {
6058d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6059b411b363SPhilipp Reisner 				goto reconnect;
60601952e916SAndreas Gruenbacher 			}
6061b411b363SPhilipp Reisner 
6062bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6063f36af18cSLars Ellenberg 
6064668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6065668700b4SPhilipp Reisner 				set_idle_timeout(connection);
606644ed167dSPhilipp Reisner 				ping_timeout_active = false;
606744ed167dSPhilipp Reisner 			}
6068b411b363SPhilipp Reisner 
6069bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6070b411b363SPhilipp Reisner 			received = 0;
607152b061a4SAndreas Gruenbacher 			expect	 = header_size;
6072b411b363SPhilipp Reisner 			cmd	 = NULL;
6073b411b363SPhilipp Reisner 		}
6074b411b363SPhilipp Reisner 	}
6075b411b363SPhilipp Reisner 
6076b411b363SPhilipp Reisner 	if (0) {
6077b411b363SPhilipp Reisner reconnect:
6078bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6079bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6080b411b363SPhilipp Reisner 	}
6081b411b363SPhilipp Reisner 	if (0) {
6082b411b363SPhilipp Reisner disconnect:
6083bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6084b411b363SPhilipp Reisner 	}
6085b411b363SPhilipp Reisner 
6086668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6087b411b363SPhilipp Reisner 
6088b411b363SPhilipp Reisner 	return 0;
6089b411b363SPhilipp Reisner }
6090668700b4SPhilipp Reisner 
6091668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6092668700b4SPhilipp Reisner {
6093668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6094668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6095668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6096668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6097668700b4SPhilipp Reisner 	struct net_conf *nc;
6098668700b4SPhilipp Reisner 	int tcp_cork, err;
6099668700b4SPhilipp Reisner 
6100668700b4SPhilipp Reisner 	rcu_read_lock();
6101668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6102668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6103668700b4SPhilipp Reisner 	rcu_read_unlock();
6104668700b4SPhilipp Reisner 
6105668700b4SPhilipp Reisner 	if (tcp_cork)
6106db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6107668700b4SPhilipp Reisner 
6108668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6109668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6110668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6111668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6112668700b4SPhilipp Reisner 
6113668700b4SPhilipp Reisner 	if (err) {
6114668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6115668700b4SPhilipp Reisner 		return;
6116668700b4SPhilipp Reisner 	}
6117668700b4SPhilipp Reisner 
6118668700b4SPhilipp Reisner 	if (tcp_cork)
6119db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6120668700b4SPhilipp Reisner 
6121668700b4SPhilipp Reisner 	return;
6122668700b4SPhilipp Reisner }
6123