1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
37b411b363SPhilipp Reisner #include "drbd_int.h"
38a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner #include "drbd_vli.h"
41b411b363SPhilipp Reisner 
42f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4320c68fdeSLars Ellenberg 
4477351055SPhilipp Reisner struct packet_info {
4577351055SPhilipp Reisner 	enum drbd_packet cmd;
46e2857216SAndreas Gruenbacher 	unsigned int size;
47e2857216SAndreas Gruenbacher 	unsigned int vnr;
48e658983aSAndreas Gruenbacher 	void *data;
4977351055SPhilipp Reisner };
5077351055SPhilipp Reisner 
51b411b363SPhilipp Reisner enum finish_epoch {
52b411b363SPhilipp Reisner 	FE_STILL_LIVE,
53b411b363SPhilipp Reisner 	FE_DESTROYED,
54b411b363SPhilipp Reisner 	FE_RECYCLED,
55b411b363SPhilipp Reisner };
56b411b363SPhilipp Reisner 
57bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
58bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5969a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
60a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
61bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6299920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
66b411b363SPhilipp Reisner 
6745bb912bSLars Ellenberg /*
6845bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6945bb912bSLars Ellenberg  * page->private being our "next" pointer.
7045bb912bSLars Ellenberg  */
7145bb912bSLars Ellenberg 
7245bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7345bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7445bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7545bb912bSLars Ellenberg  */
7645bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7745bb912bSLars Ellenberg {
7845bb912bSLars Ellenberg 	struct page *page;
7945bb912bSLars Ellenberg 	struct page *tmp;
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg 	BUG_ON(!n);
8245bb912bSLars Ellenberg 	BUG_ON(!head);
8345bb912bSLars Ellenberg 
8445bb912bSLars Ellenberg 	page = *head;
8523ce4227SPhilipp Reisner 
8623ce4227SPhilipp Reisner 	if (!page)
8723ce4227SPhilipp Reisner 		return NULL;
8823ce4227SPhilipp Reisner 
8945bb912bSLars Ellenberg 	while (page) {
9045bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9145bb912bSLars Ellenberg 		if (--n == 0)
9245bb912bSLars Ellenberg 			break; /* found sufficient pages */
9345bb912bSLars Ellenberg 		if (tmp == NULL)
9445bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9545bb912bSLars Ellenberg 			return NULL;
9645bb912bSLars Ellenberg 		page = tmp;
9745bb912bSLars Ellenberg 	}
9845bb912bSLars Ellenberg 
9945bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10045bb912bSLars Ellenberg 	set_page_private(page, 0);
10145bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10245bb912bSLars Ellenberg 	page = *head;
10345bb912bSLars Ellenberg 	*head = tmp;
10445bb912bSLars Ellenberg 	return page;
10545bb912bSLars Ellenberg }
10645bb912bSLars Ellenberg 
10745bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10845bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10945bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11045bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11145bb912bSLars Ellenberg {
11245bb912bSLars Ellenberg 	struct page *tmp;
11345bb912bSLars Ellenberg 	int i = 1;
114e8628013SJoe Perches 	while ((tmp = page_chain_next(page))) {
115e8628013SJoe Perches 		++i;
116e8628013SJoe Perches 		page = tmp;
117e8628013SJoe Perches 	}
11845bb912bSLars Ellenberg 	if (len)
11945bb912bSLars Ellenberg 		*len = i;
12045bb912bSLars Ellenberg 	return page;
12145bb912bSLars Ellenberg }
12245bb912bSLars Ellenberg 
12345bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12445bb912bSLars Ellenberg {
12545bb912bSLars Ellenberg 	struct page *tmp;
12645bb912bSLars Ellenberg 	int i = 0;
12745bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12845bb912bSLars Ellenberg 		put_page(page);
12945bb912bSLars Ellenberg 		++i;
13045bb912bSLars Ellenberg 	}
13145bb912bSLars Ellenberg 	return i;
13245bb912bSLars Ellenberg }
13345bb912bSLars Ellenberg 
13445bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13545bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13645bb912bSLars Ellenberg {
13745bb912bSLars Ellenberg #if 1
13845bb912bSLars Ellenberg 	struct page *tmp;
13945bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14045bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14145bb912bSLars Ellenberg #endif
14245bb912bSLars Ellenberg 
14345bb912bSLars Ellenberg 	/* add chain to head */
14445bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14545bb912bSLars Ellenberg 	*head = chain_first;
14645bb912bSLars Ellenberg }
14745bb912bSLars Ellenberg 
148b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14918c2d522SAndreas Gruenbacher 				       unsigned int number)
150b411b363SPhilipp Reisner {
151b411b363SPhilipp Reisner 	struct page *page = NULL;
15245bb912bSLars Ellenberg 	struct page *tmp = NULL;
15318c2d522SAndreas Gruenbacher 	unsigned int i = 0;
154b411b363SPhilipp Reisner 
155b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
156b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15745bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
158b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15945bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16045bb912bSLars Ellenberg 		if (page)
16145bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
162b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16345bb912bSLars Ellenberg 		if (page)
16445bb912bSLars Ellenberg 			return page;
165b411b363SPhilipp Reisner 	}
16645bb912bSLars Ellenberg 
167b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
168b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
169b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17045bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17145bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17245bb912bSLars Ellenberg 		if (!tmp)
17345bb912bSLars Ellenberg 			break;
17445bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17545bb912bSLars Ellenberg 		page = tmp;
17645bb912bSLars Ellenberg 	}
17745bb912bSLars Ellenberg 
17845bb912bSLars Ellenberg 	if (i == number)
179b411b363SPhilipp Reisner 		return page;
18045bb912bSLars Ellenberg 
18145bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
182c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18345bb912bSLars Ellenberg 	 * function "soon". */
18445bb912bSLars Ellenberg 	if (page) {
18545bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18645bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18745bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18845bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18945bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19045bb912bSLars Ellenberg 	}
19145bb912bSLars Ellenberg 	return NULL;
192b411b363SPhilipp Reisner }
193b411b363SPhilipp Reisner 
194b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
195a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
196b411b363SPhilipp Reisner {
197a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
198b411b363SPhilipp Reisner 
199b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
200b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
201b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
202b411b363SPhilipp Reisner 	   stop to examine the list... */
203b411b363SPhilipp Reisner 
204a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
205045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
206b411b363SPhilipp Reisner 			break;
207a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
208b411b363SPhilipp Reisner 	}
209b411b363SPhilipp Reisner }
210b411b363SPhilipp Reisner 
211668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
212b411b363SPhilipp Reisner {
213b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
214db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
215b411b363SPhilipp Reisner 
2160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
217b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
219a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
220b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
221b411b363SPhilipp Reisner }
222b411b363SPhilipp Reisner 
223668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
224668700b4SPhilipp Reisner {
225668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
226668700b4SPhilipp Reisner 	int vnr;
227668700b4SPhilipp Reisner 
228668700b4SPhilipp Reisner 	rcu_read_lock();
229668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
230668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
231668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
232668700b4SPhilipp Reisner 			continue;
233668700b4SPhilipp Reisner 
234668700b4SPhilipp Reisner 		kref_get(&device->kref);
235668700b4SPhilipp Reisner 		rcu_read_unlock();
236668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
237668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
238668700b4SPhilipp Reisner 		rcu_read_lock();
239668700b4SPhilipp Reisner 	}
240668700b4SPhilipp Reisner 	rcu_read_unlock();
241668700b4SPhilipp Reisner }
242668700b4SPhilipp Reisner 
243b411b363SPhilipp Reisner /**
244c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2459b48ff07SLee Jones  * @peer_device:	DRBD device.
24645bb912bSLars Ellenberg  * @number:		number of pages requested
24745bb912bSLars Ellenberg  * @retry:		whether to retry, if not enough pages are available right now
248b411b363SPhilipp Reisner  *
24945bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2500e49d7b0SLars Ellenberg  * the kernel.
25145bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25245bb912bSLars Ellenberg  *
2530e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2540e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2550e49d7b0SLars Ellenberg  *
2560e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2570e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2580e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2590e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2600e49d7b0SLars Ellenberg  *
26145bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
262b411b363SPhilipp Reisner  */
26369a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
264c37c8ecfSAndreas Gruenbacher 			      bool retry)
265b411b363SPhilipp Reisner {
26669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
267b411b363SPhilipp Reisner 	struct page *page = NULL;
26844ed167dSPhilipp Reisner 	struct net_conf *nc;
269b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2700e49d7b0SLars Ellenberg 	unsigned int mxb;
271b411b363SPhilipp Reisner 
27244ed167dSPhilipp Reisner 	rcu_read_lock();
27369a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27444ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27544ed167dSPhilipp Reisner 	rcu_read_unlock();
27644ed167dSPhilipp Reisner 
277b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
278b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
279b411b363SPhilipp Reisner 
280668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
281668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
282668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
283668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
284668700b4SPhilipp Reisner 
28545bb912bSLars Ellenberg 	while (page == NULL) {
286b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
287b411b363SPhilipp Reisner 
288668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
289b411b363SPhilipp Reisner 
290b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
291b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
292b411b363SPhilipp Reisner 			if (page)
293b411b363SPhilipp Reisner 				break;
294b411b363SPhilipp Reisner 		}
295b411b363SPhilipp Reisner 
296b411b363SPhilipp Reisner 		if (!retry)
297b411b363SPhilipp Reisner 			break;
298b411b363SPhilipp Reisner 
299b411b363SPhilipp Reisner 		if (signal_pending(current)) {
300d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
301b411b363SPhilipp Reisner 			break;
302b411b363SPhilipp Reisner 		}
303b411b363SPhilipp Reisner 
3040e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3050e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
306b411b363SPhilipp Reisner 	}
307b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
308b411b363SPhilipp Reisner 
30945bb912bSLars Ellenberg 	if (page)
310b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
311b411b363SPhilipp Reisner 	return page;
312b411b363SPhilipp Reisner }
313b411b363SPhilipp Reisner 
314c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3150500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31645bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31745bb912bSLars Ellenberg  * or returns all pages to the system. */
318b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
319b411b363SPhilipp Reisner {
320b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
321b411b363SPhilipp Reisner 	int i;
322435f0740SLars Ellenberg 
323a73ff323SLars Ellenberg 	if (page == NULL)
324a73ff323SLars Ellenberg 		return;
325a73ff323SLars Ellenberg 
326183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32745bb912bSLars Ellenberg 		i = page_chain_free(page);
32845bb912bSLars Ellenberg 	else {
32945bb912bSLars Ellenberg 		struct page *tmp;
33045bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
331b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33245bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33345bb912bSLars Ellenberg 		drbd_pp_vacant += i;
334b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
335b411b363SPhilipp Reisner 	}
336435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33745bb912bSLars Ellenberg 	if (i < 0)
338d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
339435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
340b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
341b411b363SPhilipp Reisner }
342b411b363SPhilipp Reisner 
343b411b363SPhilipp Reisner /*
344b411b363SPhilipp Reisner You need to hold the req_lock:
345b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
346b411b363SPhilipp Reisner 
347b411b363SPhilipp Reisner You must not have the req_lock:
3483967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3490db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3507721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
351b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
352a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
353b411b363SPhilipp Reisner  drbd_clear_done_ee()
354b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
355b411b363SPhilipp Reisner */
356b411b363SPhilipp Reisner 
3579104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3589104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3599104d31aSLars Ellenberg  * trim: payload_size == 0 */
360f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36169a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3629104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
363b411b363SPhilipp Reisner {
36469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
365db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
366a73ff323SLars Ellenberg 	struct page *page = NULL;
367e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(payload_size);
368b411b363SPhilipp Reisner 
369b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
370b411b363SPhilipp Reisner 		return NULL;
371b411b363SPhilipp Reisner 
3720892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
373db830c46SAndreas Gruenbacher 	if (!peer_req) {
374b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
375d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
376b411b363SPhilipp Reisner 		return NULL;
377b411b363SPhilipp Reisner 	}
378b411b363SPhilipp Reisner 
3799104d31aSLars Ellenberg 	if (nr_pages) {
380d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
381d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38245bb912bSLars Ellenberg 		if (!page)
38345bb912bSLars Ellenberg 			goto fail;
384a73ff323SLars Ellenberg 	}
385b411b363SPhilipp Reisner 
386c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
387c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
388db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3899104d31aSLars Ellenberg 	peer_req->i.size = request_size;
390db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
391c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
392a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
393db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3949a8e7753SAndreas Gruenbacher 	/*
3959a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3969a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3979a8e7753SAndreas Gruenbacher 	 */
398db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
399b411b363SPhilipp Reisner 
400db830c46SAndreas Gruenbacher 	return peer_req;
401b411b363SPhilipp Reisner 
40245bb912bSLars Ellenberg  fail:
4030892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
404b411b363SPhilipp Reisner 	return NULL;
405b411b363SPhilipp Reisner }
406b411b363SPhilipp Reisner 
407b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
408f6ffca9fSAndreas Gruenbacher 		       int is_net)
409b411b363SPhilipp Reisner {
41021ae5d7fSLars Ellenberg 	might_sleep();
411db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
412db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
413b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4140b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4150b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41621ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41721ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41821ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41921ae5d7fSLars Ellenberg 	}
4200892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
421b411b363SPhilipp Reisner }
422b411b363SPhilipp Reisner 
423b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
424b411b363SPhilipp Reisner {
425b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
426db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
427b411b363SPhilipp Reisner 	int count = 0;
428b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
429b411b363SPhilipp Reisner 
4300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
431b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
433b411b363SPhilipp Reisner 
434a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
435b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
436b411b363SPhilipp Reisner 		count++;
437b411b363SPhilipp Reisner 	}
438b411b363SPhilipp Reisner 	return count;
439b411b363SPhilipp Reisner }
440b411b363SPhilipp Reisner 
441b411b363SPhilipp Reisner /*
442a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
443b411b363SPhilipp Reisner  */
444b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
445b411b363SPhilipp Reisner {
446b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
447b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
448db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
449e2b3032bSAndreas Gruenbacher 	int err = 0;
450b411b363SPhilipp Reisner 
4510500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
452b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
453b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4540500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
455b411b363SPhilipp Reisner 
456a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
457b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
458b411b363SPhilipp Reisner 
459b411b363SPhilipp Reisner 	/* possible callbacks here:
460d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
461b411b363SPhilipp Reisner 	 * all ignore the last argument.
462b411b363SPhilipp Reisner 	 */
463a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
464e2b3032bSAndreas Gruenbacher 		int err2;
465e2b3032bSAndreas Gruenbacher 
466b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
467a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
468e2b3032bSAndreas Gruenbacher 		if (!err)
469e2b3032bSAndreas Gruenbacher 			err = err2;
470b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
471b411b363SPhilipp Reisner 	}
472b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
473b411b363SPhilipp Reisner 
474e2b3032bSAndreas Gruenbacher 	return err;
475b411b363SPhilipp Reisner }
476b411b363SPhilipp Reisner 
477b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
478d4da1537SAndreas Gruenbacher 				     struct list_head *head)
479b411b363SPhilipp Reisner {
480b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
481b411b363SPhilipp Reisner 
482b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
483b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
484b411b363SPhilipp Reisner 	while (!list_empty(head)) {
485b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4860500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4877eaceaccSJens Axboe 		io_schedule();
488b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4890500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
490b411b363SPhilipp Reisner 	}
491b411b363SPhilipp Reisner }
492b411b363SPhilipp Reisner 
493b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
494d4da1537SAndreas Gruenbacher 				    struct list_head *head)
495b411b363SPhilipp Reisner {
4960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
497b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
499b411b363SPhilipp Reisner }
500b411b363SPhilipp Reisner 
501dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
502b411b363SPhilipp Reisner {
503b411b363SPhilipp Reisner 	struct kvec iov = {
504b411b363SPhilipp Reisner 		.iov_base = buf,
505b411b363SPhilipp Reisner 		.iov_len = size,
506b411b363SPhilipp Reisner 	};
507b411b363SPhilipp Reisner 	struct msghdr msg = {
508b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
509b411b363SPhilipp Reisner 	};
510*de4eda9dSAl Viro 	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
511f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
512b411b363SPhilipp Reisner }
513b411b363SPhilipp Reisner 
514bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
515b411b363SPhilipp Reisner {
516b411b363SPhilipp Reisner 	int rv;
517b411b363SPhilipp Reisner 
518bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
519b411b363SPhilipp Reisner 
520b411b363SPhilipp Reisner 	if (rv < 0) {
521b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5221ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
523b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
525b411b363SPhilipp Reisner 	} else if (rv == 0) {
526bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
527b66623e3SPhilipp Reisner 			long t;
528b66623e3SPhilipp Reisner 			rcu_read_lock();
529bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
530b66623e3SPhilipp Reisner 			rcu_read_unlock();
531b66623e3SPhilipp Reisner 
532bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
533b66623e3SPhilipp Reisner 
534599377acSPhilipp Reisner 			if (t)
535599377acSPhilipp Reisner 				goto out;
536599377acSPhilipp Reisner 		}
5371ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
538599377acSPhilipp Reisner 	}
539599377acSPhilipp Reisner 
540b411b363SPhilipp Reisner 	if (rv != size)
541bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
542b411b363SPhilipp Reisner 
543599377acSPhilipp Reisner out:
544b411b363SPhilipp Reisner 	return rv;
545b411b363SPhilipp Reisner }
546b411b363SPhilipp Reisner 
547bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
548c6967746SAndreas Gruenbacher {
549c6967746SAndreas Gruenbacher 	int err;
550c6967746SAndreas Gruenbacher 
551bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
552c6967746SAndreas Gruenbacher 	if (err != size) {
553c6967746SAndreas Gruenbacher 		if (err >= 0)
554c6967746SAndreas Gruenbacher 			err = -EIO;
555c6967746SAndreas Gruenbacher 	} else
556c6967746SAndreas Gruenbacher 		err = 0;
557c6967746SAndreas Gruenbacher 	return err;
558c6967746SAndreas Gruenbacher }
559c6967746SAndreas Gruenbacher 
560bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
561a5c31904SAndreas Gruenbacher {
562a5c31904SAndreas Gruenbacher 	int err;
563a5c31904SAndreas Gruenbacher 
564bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
565a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5661ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
567a5c31904SAndreas Gruenbacher 	return err;
568a5c31904SAndreas Gruenbacher }
569a5c31904SAndreas Gruenbacher 
5705dbf1673SLars Ellenberg /* quoting tcp(7):
5715dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5725dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5735dbf1673SLars Ellenberg  * This is our wrapper to do so.
5745dbf1673SLars Ellenberg  */
5755dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5765dbf1673SLars Ellenberg 		unsigned int rcv)
5775dbf1673SLars Ellenberg {
5785dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5795dbf1673SLars Ellenberg 	if (snd) {
5805dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5815dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5825dbf1673SLars Ellenberg 	}
5835dbf1673SLars Ellenberg 	if (rcv) {
5845dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5855dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5865dbf1673SLars Ellenberg 	}
5875dbf1673SLars Ellenberg }
5885dbf1673SLars Ellenberg 
589bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
590b411b363SPhilipp Reisner {
591b411b363SPhilipp Reisner 	const char *what;
592b411b363SPhilipp Reisner 	struct socket *sock;
593b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59444ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59544ed167dSPhilipp Reisner 	struct net_conf *nc;
59644ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59769ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
598b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
599b411b363SPhilipp Reisner 
60044ed167dSPhilipp Reisner 	rcu_read_lock();
601bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60244ed167dSPhilipp Reisner 	if (!nc) {
60344ed167dSPhilipp Reisner 		rcu_read_unlock();
604b411b363SPhilipp Reisner 		return NULL;
60544ed167dSPhilipp Reisner 	}
60644ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60744ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60869ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
609089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61044ed167dSPhilipp Reisner 
611bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
612bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61544ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61644ed167dSPhilipp Reisner 	else
61744ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61844ed167dSPhilipp Reisner 
619bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
620bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
621b411b363SPhilipp Reisner 
622b411b363SPhilipp Reisner 	what = "sock_create_kern";
623eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
624b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
625b411b363SPhilipp Reisner 	if (err < 0) {
626b411b363SPhilipp Reisner 		sock = NULL;
627b411b363SPhilipp Reisner 		goto out;
628b411b363SPhilipp Reisner 	}
629b411b363SPhilipp Reisner 
630b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63169ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63244ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
633b411b363SPhilipp Reisner 
634b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
635b411b363SPhilipp Reisner 	*  for the outgoing connections.
636b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
637b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
638b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
639b411b363SPhilipp Reisner 	*  a free one dynamically.
640b411b363SPhilipp Reisner 	*/
641b411b363SPhilipp Reisner 	what = "bind before connect";
64244ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
643b411b363SPhilipp Reisner 	if (err < 0)
644b411b363SPhilipp Reisner 		goto out;
645b411b363SPhilipp Reisner 
646b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
647b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
648b411b363SPhilipp Reisner 	disconnect_on_error = 0;
649b411b363SPhilipp Reisner 	what = "connect";
65044ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner out:
653b411b363SPhilipp Reisner 	if (err < 0) {
654b411b363SPhilipp Reisner 		if (sock) {
655b411b363SPhilipp Reisner 			sock_release(sock);
656b411b363SPhilipp Reisner 			sock = NULL;
657b411b363SPhilipp Reisner 		}
658b411b363SPhilipp Reisner 		switch (-err) {
659b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
660b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
661b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
662b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
663b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
664b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
665b411b363SPhilipp Reisner 			disconnect_on_error = 0;
666b411b363SPhilipp Reisner 			break;
667b411b363SPhilipp Reisner 		default:
6681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
669b411b363SPhilipp Reisner 		}
670b411b363SPhilipp Reisner 		if (disconnect_on_error)
671bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
672b411b363SPhilipp Reisner 	}
67344ed167dSPhilipp Reisner 
674b411b363SPhilipp Reisner 	return sock;
675b411b363SPhilipp Reisner }
676b411b363SPhilipp Reisner 
6777a426fd8SPhilipp Reisner struct accept_wait_data {
678bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6797a426fd8SPhilipp Reisner 	struct socket *s_listen;
6807a426fd8SPhilipp Reisner 	struct completion door_bell;
6817a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6827a426fd8SPhilipp Reisner 
6837a426fd8SPhilipp Reisner };
6847a426fd8SPhilipp Reisner 
685715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
686b411b363SPhilipp Reisner {
6877a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
688715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6897a426fd8SPhilipp Reisner 
690715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
691715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6927a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
693715306f6SAndreas Gruenbacher 	state_change(sk);
6947a426fd8SPhilipp Reisner }
6957a426fd8SPhilipp Reisner 
696bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
697b411b363SPhilipp Reisner {
6981f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69944ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7001f3e509bSPhilipp Reisner 	struct socket *s_listen;
70144ed167dSPhilipp Reisner 	struct net_conf *nc;
702b411b363SPhilipp Reisner 	const char *what;
703b411b363SPhilipp Reisner 
70444ed167dSPhilipp Reisner 	rcu_read_lock();
705bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70644ed167dSPhilipp Reisner 	if (!nc) {
70744ed167dSPhilipp Reisner 		rcu_read_unlock();
7087a426fd8SPhilipp Reisner 		return -EIO;
70944ed167dSPhilipp Reisner 	}
71044ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71144ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71244ed167dSPhilipp Reisner 	rcu_read_unlock();
713b411b363SPhilipp Reisner 
714bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
715bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
716b411b363SPhilipp Reisner 
717b411b363SPhilipp Reisner 	what = "sock_create_kern";
718eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
719b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
720b411b363SPhilipp Reisner 	if (err) {
721b411b363SPhilipp Reisner 		s_listen = NULL;
722b411b363SPhilipp Reisner 		goto out;
723b411b363SPhilipp Reisner 	}
724b411b363SPhilipp Reisner 
7254a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72644ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
727b411b363SPhilipp Reisner 
728b411b363SPhilipp Reisner 	what = "bind before listen";
72944ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
730b411b363SPhilipp Reisner 	if (err < 0)
731b411b363SPhilipp Reisner 		goto out;
732b411b363SPhilipp Reisner 
7337a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7347a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7357a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
736715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7377a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7387a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
739b411b363SPhilipp Reisner 
7402820fd39SPhilipp Reisner 	what = "listen";
7412820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7422820fd39SPhilipp Reisner 	if (err < 0)
7432820fd39SPhilipp Reisner 		goto out;
7442820fd39SPhilipp Reisner 
7457a426fd8SPhilipp Reisner 	return 0;
746b411b363SPhilipp Reisner out:
747b411b363SPhilipp Reisner 	if (s_listen)
748b411b363SPhilipp Reisner 		sock_release(s_listen);
749b411b363SPhilipp Reisner 	if (err < 0) {
750b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7511ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
752bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
753b411b363SPhilipp Reisner 		}
754b411b363SPhilipp Reisner 	}
7551f3e509bSPhilipp Reisner 
7567a426fd8SPhilipp Reisner 	return -EIO;
7571f3e509bSPhilipp Reisner }
7581f3e509bSPhilipp Reisner 
759715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
760715306f6SAndreas Gruenbacher {
761715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
762715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
763715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
764715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher }
766715306f6SAndreas Gruenbacher 
767bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7681f3e509bSPhilipp Reisner {
7691f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7701f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7711f3e509bSPhilipp Reisner 	struct net_conf *nc;
7721f3e509bSPhilipp Reisner 
7731f3e509bSPhilipp Reisner 	rcu_read_lock();
774bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7751f3e509bSPhilipp Reisner 	if (!nc) {
7761f3e509bSPhilipp Reisner 		rcu_read_unlock();
7771f3e509bSPhilipp Reisner 		return NULL;
7781f3e509bSPhilipp Reisner 	}
7791f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7801f3e509bSPhilipp Reisner 	rcu_read_unlock();
7811f3e509bSPhilipp Reisner 
7821f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78338b682b2SAkinobu Mita 	/* 28.5% random jitter */
78481895a65SJason A. Donenfeld 	timeo += prandom_u32_max(2) ? timeo / 7 : -timeo / 7;
7851f3e509bSPhilipp Reisner 
7867a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7877a426fd8SPhilipp Reisner 	if (err <= 0)
7887a426fd8SPhilipp Reisner 		return NULL;
7891f3e509bSPhilipp Reisner 
7907a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
791b411b363SPhilipp Reisner 	if (err < 0) {
792b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
794bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
795b411b363SPhilipp Reisner 		}
796b411b363SPhilipp Reisner 	}
797b411b363SPhilipp Reisner 
798715306f6SAndreas Gruenbacher 	if (s_estab)
799715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
800b411b363SPhilipp Reisner 
801b411b363SPhilipp Reisner 	return s_estab;
802b411b363SPhilipp Reisner }
803b411b363SPhilipp Reisner 
804bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
805b411b363SPhilipp Reisner 
806bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8079f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8089f5bdc33SAndreas Gruenbacher {
809bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8109f5bdc33SAndreas Gruenbacher 		return -EIO;
811bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
814bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
815b411b363SPhilipp Reisner {
816bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8179f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8184920e37aSPhilipp Reisner 	struct net_conf *nc;
8199f5bdc33SAndreas Gruenbacher 	int err;
820b411b363SPhilipp Reisner 
8214920e37aSPhilipp Reisner 	rcu_read_lock();
8224920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8234920e37aSPhilipp Reisner 	if (!nc) {
8244920e37aSPhilipp Reisner 		rcu_read_unlock();
8254920e37aSPhilipp Reisner 		return -EIO;
8264920e37aSPhilipp Reisner 	}
8274920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8284920e37aSPhilipp Reisner 	rcu_read_unlock();
8294920e37aSPhilipp Reisner 
830bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8319f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8329f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8339f5bdc33SAndreas Gruenbacher 			err = -EIO;
8349f5bdc33SAndreas Gruenbacher 		return err;
8359f5bdc33SAndreas Gruenbacher 	}
836bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8379f5bdc33SAndreas Gruenbacher 	if (err)
8389f5bdc33SAndreas Gruenbacher 		return err;
8399f5bdc33SAndreas Gruenbacher 	return pi.cmd;
840b411b363SPhilipp Reisner }
841b411b363SPhilipp Reisner 
842b411b363SPhilipp Reisner /**
843b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
844b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
845b411b363SPhilipp Reisner  */
8465d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
847b411b363SPhilipp Reisner {
848b411b363SPhilipp Reisner 	int rr;
849b411b363SPhilipp Reisner 	char tb[4];
850b411b363SPhilipp Reisner 
851b411b363SPhilipp Reisner 	if (!*sock)
85281e84650SAndreas Gruenbacher 		return false;
853b411b363SPhilipp Reisner 
854dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
855b411b363SPhilipp Reisner 
856b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85781e84650SAndreas Gruenbacher 		return true;
858b411b363SPhilipp Reisner 	} else {
859b411b363SPhilipp Reisner 		sock_release(*sock);
860b411b363SPhilipp Reisner 		*sock = NULL;
86181e84650SAndreas Gruenbacher 		return false;
862b411b363SPhilipp Reisner 	}
863b411b363SPhilipp Reisner }
8645d0b17f1SPhilipp Reisner 
8655d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8665d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8675d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8685d0b17f1SPhilipp Reisner {
8695d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8705d0b17f1SPhilipp Reisner 	int timeout;
8715d0b17f1SPhilipp Reisner 	bool ok;
8725d0b17f1SPhilipp Reisner 
8735d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8745d0b17f1SPhilipp Reisner 		return false;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	rcu_read_lock();
8775d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8785d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8795d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8805d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8815d0b17f1SPhilipp Reisner 
8825d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8835d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	return ok;
8865d0b17f1SPhilipp Reisner }
8875d0b17f1SPhilipp Reisner 
8882325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8892325eb66SPhilipp Reisner    in a connection */
89069a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
891907599e0SPhilipp Reisner {
89269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8930829f5edSAndreas Gruenbacher 	int err;
894907599e0SPhilipp Reisner 
895b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
896b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
897907599e0SPhilipp Reisner 
89869a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89969a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
900b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9018410da8fSPhilipp Reisner 
90269a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9030829f5edSAndreas Gruenbacher 	if (!err)
90469a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9050829f5edSAndreas Gruenbacher 	if (!err)
90669a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9070829f5edSAndreas Gruenbacher 	if (!err)
90869a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
909b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
910b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
911b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
912b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9130829f5edSAndreas Gruenbacher 	return err;
914907599e0SPhilipp Reisner }
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner /*
917b411b363SPhilipp Reisner  * return values:
918b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
919b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
920b411b363SPhilipp Reisner  *  -1 peer talks different language,
921b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
922b411b363SPhilipp Reisner  *  -2 We do not have a network config...
923b411b363SPhilipp Reisner  */
924bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
925b411b363SPhilipp Reisner {
9267da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
927c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92844ed167dSPhilipp Reisner 	struct net_conf *nc;
9295d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9305d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
931197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9327a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
933bde89a9eSAndreas Gruenbacher 		.connection = connection,
9347a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9357a426fd8SPhilipp Reisner 	};
936b411b363SPhilipp Reisner 
937bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
938bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
939b411b363SPhilipp Reisner 		return -2;
940b411b363SPhilipp Reisner 
9417da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
942bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
943bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9447da35862SPhilipp Reisner 	sock.socket = NULL;
9457da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
946bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
947bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9487da35862SPhilipp Reisner 	msock.socket = NULL;
9497da35862SPhilipp Reisner 
9500916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
951bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
952b411b363SPhilipp Reisner 
953bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9547a426fd8SPhilipp Reisner 		return 0;
955b411b363SPhilipp Reisner 
956b411b363SPhilipp Reisner 	do {
9572bf89621SAndreas Gruenbacher 		struct socket *s;
958b411b363SPhilipp Reisner 
959bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
960b411b363SPhilipp Reisner 		if (s) {
9617da35862SPhilipp Reisner 			if (!sock.socket) {
9627da35862SPhilipp Reisner 				sock.socket = s;
963bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9647da35862SPhilipp Reisner 			} else if (!msock.socket) {
965bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9667da35862SPhilipp Reisner 				msock.socket = s;
967bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
968b411b363SPhilipp Reisner 			} else {
9691ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
970b411b363SPhilipp Reisner 				goto out_release_sockets;
971b411b363SPhilipp Reisner 			}
972b411b363SPhilipp Reisner 		}
973b411b363SPhilipp Reisner 
9745d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
975b411b363SPhilipp Reisner 			break;
976b411b363SPhilipp Reisner 
977b411b363SPhilipp Reisner retry:
978bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
979b411b363SPhilipp Reisner 		if (s) {
980bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9817da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9827da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98392f14951SPhilipp Reisner 			switch (fp) {
984e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9857da35862SPhilipp Reisner 				if (sock.socket) {
9861ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9877da35862SPhilipp Reisner 					sock_release(sock.socket);
98880c6eed4SPhilipp Reisner 					sock.socket = s;
98980c6eed4SPhilipp Reisner 					goto randomize;
990b411b363SPhilipp Reisner 				}
9917da35862SPhilipp Reisner 				sock.socket = s;
992b411b363SPhilipp Reisner 				break;
993e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
994bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9957da35862SPhilipp Reisner 				if (msock.socket) {
9961ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9977da35862SPhilipp Reisner 					sock_release(msock.socket);
99880c6eed4SPhilipp Reisner 					msock.socket = s;
99980c6eed4SPhilipp Reisner 					goto randomize;
1000b411b363SPhilipp Reisner 				}
10017da35862SPhilipp Reisner 				msock.socket = s;
1002b411b363SPhilipp Reisner 				break;
1003b411b363SPhilipp Reisner 			default:
10041ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1005b411b363SPhilipp Reisner 				sock_release(s);
100680c6eed4SPhilipp Reisner randomize:
100781895a65SJason A. Donenfeld 				if (prandom_u32_max(2))
1008b411b363SPhilipp Reisner 					goto retry;
1009b411b363SPhilipp Reisner 			}
1010b411b363SPhilipp Reisner 		}
1011b411b363SPhilipp Reisner 
1012bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1013b411b363SPhilipp Reisner 			goto out_release_sockets;
1014b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1015b411b363SPhilipp Reisner 			flush_signals(current);
1016b411b363SPhilipp Reisner 			smp_rmb();
1017bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1018b411b363SPhilipp Reisner 				goto out_release_sockets;
1019b411b363SPhilipp Reisner 		}
1020b411b363SPhilipp Reisner 
10215d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1022b666dbf8SPhilipp Reisner 	} while (!ok);
1023b411b363SPhilipp Reisner 
10247a426fd8SPhilipp Reisner 	if (ad.s_listen)
10257a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1026b411b363SPhilipp Reisner 
102798683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102898683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1029b411b363SPhilipp Reisner 
10307da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10317da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1035b411b363SPhilipp Reisner 
1036b411b363SPhilipp Reisner 	/* NOT YET ...
1037bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10387da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10396038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1040b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104144ed167dSPhilipp Reisner 	rcu_read_lock();
1042bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1043b411b363SPhilipp Reisner 
10447da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10457da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104644ed167dSPhilipp Reisner 
10477da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104844ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104908b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105044ed167dSPhilipp Reisner 	rcu_read_unlock();
105144ed167dSPhilipp Reisner 
10527da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1053b411b363SPhilipp Reisner 
1054b411b363SPhilipp Reisner 	/* we don't want delays.
105525985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105612abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105712abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1058b411b363SPhilipp Reisner 
1059bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1060bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1061bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1062b411b363SPhilipp Reisner 
1063bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1064b411b363SPhilipp Reisner 	if (h <= 0)
1065b411b363SPhilipp Reisner 		return h;
1066b411b363SPhilipp Reisner 
1067bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1068b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1069bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1070b10d96cbSJohannes Thoma 		case -1:
10711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1072b411b363SPhilipp Reisner 			return -1;
1073b10d96cbSJohannes Thoma 		case 0:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1075b10d96cbSJohannes Thoma 			return 0;
1076b411b363SPhilipp Reisner 		}
1077b411b363SPhilipp Reisner 	}
1078b411b363SPhilipp Reisner 
1079bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1080bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10837e2455c1SPhilipp Reisner 		return -1;
10841e86ac48SPhilipp Reisner 
108513c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108613c76abaSPhilipp Reisner 	 * being promoted to Primary.
108713c76abaSPhilipp Reisner 	 *
108813c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108913c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109013c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109113c76abaSPhilipp Reisner 	 */
109231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109331007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109431007745SPhilipp Reisner 
1095cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1096cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109731007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1098cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112739e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112839e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112939e91a60SLars Ellenberg 	connection->ack_sender =
113039e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1131668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1132668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1133668700b4SPhilipp Reisner 		return 0;
1134668700b4SPhilipp Reisner 	}
1135b411b363SPhilipp Reisner 
11360500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113708b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113808b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113908b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114008b165baSPhilipp Reisner 	 * just to clear a single value. */
1141bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11420500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114308b165baSPhilipp Reisner 
1144d3fcb490SPhilipp Reisner 	return h;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner out_release_sockets:
11477a426fd8SPhilipp Reisner 	if (ad.s_listen)
11487a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11497da35862SPhilipp Reisner 	if (sock.socket)
11507da35862SPhilipp Reisner 		sock_release(sock.socket);
11517da35862SPhilipp Reisner 	if (msock.socket)
11527da35862SPhilipp Reisner 		sock_release(msock.socket);
1153b411b363SPhilipp Reisner 	return -1;
1154b411b363SPhilipp Reisner }
1155b411b363SPhilipp Reisner 
1156bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1157b411b363SPhilipp Reisner {
1158bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1159b411b363SPhilipp Reisner 
11600c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11610c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11620c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11630c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11650c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116602918be2SPhilipp Reisner 		}
11670c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11680c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11690c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11700c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1171e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1172e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1173e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1174b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1175eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1176e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1177e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1178e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1179e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1180e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118177351055SPhilipp Reisner 		pi->vnr = 0;
118202918be2SPhilipp Reisner 	} else {
11831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1184e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1185bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11868172f3e9SAndreas Gruenbacher 		return -EINVAL;
1187b411b363SPhilipp Reisner 	}
1188e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11898172f3e9SAndreas Gruenbacher 	return 0;
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
1192c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1193c51a0ef3SLars Ellenberg {
1194c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1195c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1196c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1197c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1198c51a0ef3SLars Ellenberg }
1199c51a0ef3SLars Ellenberg 
1200bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1201257d0af6SPhilipp Reisner {
1202bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120369bc7bc3SAndreas Gruenbacher 	int err;
1204257d0af6SPhilipp Reisner 
1205bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1206a5c31904SAndreas Gruenbacher 	if (err)
120769bc7bc3SAndreas Gruenbacher 		return err;
1208257d0af6SPhilipp Reisner 
1209bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1210bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1211b411b363SPhilipp Reisner 
121269bc7bc3SAndreas Gruenbacher 	return err;
1213b411b363SPhilipp Reisner }
1214b411b363SPhilipp Reisner 
1215c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1216c51a0ef3SLars Ellenberg {
1217c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1218c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1219c51a0ef3SLars Ellenberg 	int err;
1220c51a0ef3SLars Ellenberg 
1221c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1222c51a0ef3SLars Ellenberg 	if (err != size) {
1223c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1224c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1225c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1226c51a0ef3SLars Ellenberg 		 * received so far. */
1227c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1228ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1229c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1230c51a0ef3SLars Ellenberg 		}
1231c51a0ef3SLars Ellenberg 		if (err > 0) {
1232c51a0ef3SLars Ellenberg 			buffer += err;
1233c51a0ef3SLars Ellenberg 			size -= err;
1234c51a0ef3SLars Ellenberg 		}
1235c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1236c51a0ef3SLars Ellenberg 		if (err)
1237c51a0ef3SLars Ellenberg 			return err;
1238c51a0ef3SLars Ellenberg 	}
1239c51a0ef3SLars Ellenberg 
1240c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1241c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1242c51a0ef3SLars Ellenberg 
1243c51a0ef3SLars Ellenberg 	return err;
1244c51a0ef3SLars Ellenberg }
1245f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1246f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1247f9ff0da5SLars Ellenberg  * then wait for all completions.
1248f9ff0da5SLars Ellenberg  */
1249f9ff0da5SLars Ellenberg struct issue_flush_context {
1250f9ff0da5SLars Ellenberg 	atomic_t pending;
1251f9ff0da5SLars Ellenberg 	int error;
1252f9ff0da5SLars Ellenberg 	struct completion done;
1253f9ff0da5SLars Ellenberg };
1254f9ff0da5SLars Ellenberg struct one_flush_context {
1255f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1256f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1257f9ff0da5SLars Ellenberg };
1258f9ff0da5SLars Ellenberg 
12591ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1260f9ff0da5SLars Ellenberg {
1261f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1262f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1263f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1264f9ff0da5SLars Ellenberg 
12654e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12664e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12674e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1268f9ff0da5SLars Ellenberg 	}
1269f9ff0da5SLars Ellenberg 	kfree(octx);
1270f9ff0da5SLars Ellenberg 	bio_put(bio);
1271f9ff0da5SLars Ellenberg 
1272f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1273f9ff0da5SLars Ellenberg 	put_ldev(device);
1274f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1275f9ff0da5SLars Ellenberg 
1276f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1277f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1278f9ff0da5SLars Ellenberg }
1279f9ff0da5SLars Ellenberg 
1280f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1281f9ff0da5SLars Ellenberg {
128207888c66SChristoph Hellwig 	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
128307888c66SChristoph Hellwig 				    REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
1284f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
12854b1dc86dSChristoph Hellwig 
12864b1dc86dSChristoph Hellwig 	if (!octx) {
12874b1dc86dSChristoph Hellwig 		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
1288f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1289f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1290f9ff0da5SLars Ellenberg 		 */
1291f9ff0da5SLars Ellenberg 		bio_put(bio);
1292f9ff0da5SLars Ellenberg 
1293f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1294f9ff0da5SLars Ellenberg 		put_ldev(device);
1295f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1296f9ff0da5SLars Ellenberg 		return;
1297f9ff0da5SLars Ellenberg 	}
1298f9ff0da5SLars Ellenberg 
1299f9ff0da5SLars Ellenberg 	octx->device = device;
1300f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
1301f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1302f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
1303f9ff0da5SLars Ellenberg 
1304f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1305f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1306f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1307f9ff0da5SLars Ellenberg 	submit_bio(bio);
1308f9ff0da5SLars Ellenberg }
1309f9ff0da5SLars Ellenberg 
1310bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1311b411b363SPhilipp Reisner {
1312f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1313c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1314f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13154b0007c0SPhilipp Reisner 		int vnr;
1316b411b363SPhilipp Reisner 
1317f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1318f9ff0da5SLars Ellenberg 		ctx.error = 0;
1319f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1320f9ff0da5SLars Ellenberg 
1321615e087fSLars Ellenberg 		rcu_read_lock();
1322c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1323c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1324c06ece6bSAndreas Gruenbacher 
1325b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1326615e087fSLars Ellenberg 				continue;
1327b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1328615e087fSLars Ellenberg 			rcu_read_unlock();
13294b0007c0SPhilipp Reisner 
1330f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1331f9ff0da5SLars Ellenberg 
1332f9ff0da5SLars Ellenberg 			rcu_read_lock();
1333f9ff0da5SLars Ellenberg 		}
1334f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1335f9ff0da5SLars Ellenberg 
1336f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1337f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1338f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1339f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1340f9ff0da5SLars Ellenberg 
1341f9ff0da5SLars Ellenberg 		if (ctx.error) {
1342b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1343b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1344b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1345f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1346f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1347b411b363SPhilipp Reisner 		}
1348b411b363SPhilipp Reisner 	}
1349b411b363SPhilipp Reisner }
1350b411b363SPhilipp Reisner 
1351b411b363SPhilipp Reisner /**
1352b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13539b48ff07SLee Jones  * @connection:	DRBD connection.
1354b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1355b411b363SPhilipp Reisner  * @ev:		Epoch event.
1356b411b363SPhilipp Reisner  */
1357bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1358b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1359b411b363SPhilipp Reisner 					       enum epoch_event ev)
1360b411b363SPhilipp Reisner {
13612451fc3bSPhilipp Reisner 	int epoch_size;
1362b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1363b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1364b411b363SPhilipp Reisner 
1365bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1366b411b363SPhilipp Reisner 	do {
1367b411b363SPhilipp Reisner 		next_epoch = NULL;
1368b411b363SPhilipp Reisner 
1369b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1372b411b363SPhilipp Reisner 		case EV_PUT:
1373b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1374b411b363SPhilipp Reisner 			break;
1375b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1376b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1377b411b363SPhilipp Reisner 			break;
1378b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1379b411b363SPhilipp Reisner 			/* nothing to do*/
1380b411b363SPhilipp Reisner 			break;
1381b411b363SPhilipp Reisner 		}
1382b411b363SPhilipp Reisner 
1383b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1384b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138580f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1386b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1387bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1388bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1389bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1390b411b363SPhilipp Reisner 			}
13919ed57dcbSLars Ellenberg #if 0
13929ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13939ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139480f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1395bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13969ed57dcbSLars Ellenberg #endif
1397b411b363SPhilipp Reisner 
1398bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1399b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1400b411b363SPhilipp Reisner 				list_del(&epoch->list);
1401b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1402bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1403b411b363SPhilipp Reisner 				kfree(epoch);
1404b411b363SPhilipp Reisner 
1405b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1406b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1407b411b363SPhilipp Reisner 			} else {
1408b411b363SPhilipp Reisner 				epoch->flags = 0;
1409b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1410698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1411b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1412b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1413b411b363SPhilipp Reisner 			}
1414b411b363SPhilipp Reisner 		}
1415b411b363SPhilipp Reisner 
1416b411b363SPhilipp Reisner 		if (!next_epoch)
1417b411b363SPhilipp Reisner 			break;
1418b411b363SPhilipp Reisner 
1419b411b363SPhilipp Reisner 		epoch = next_epoch;
1420b411b363SPhilipp Reisner 	} while (1);
1421b411b363SPhilipp Reisner 
1422bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1423b411b363SPhilipp Reisner 
1424b411b363SPhilipp Reisner 	return rv;
1425b411b363SPhilipp Reisner }
1426b411b363SPhilipp Reisner 
14278fe39aacSPhilipp Reisner static enum write_ordering_e
14288fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14298fe39aacSPhilipp Reisner {
14308fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14318fe39aacSPhilipp Reisner 
14328fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14338fe39aacSPhilipp Reisner 
1434f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1435f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14388fe39aacSPhilipp Reisner 
14398fe39aacSPhilipp Reisner 	return wo;
14408fe39aacSPhilipp Reisner }
14418fe39aacSPhilipp Reisner 
14429b48ff07SLee Jones /*
1443b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1444b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1445b411b363SPhilipp Reisner  */
14468fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14478fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1448b411b363SPhilipp Reisner {
1449e9526580SPhilipp Reisner 	struct drbd_device *device;
1450b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14514b0007c0SPhilipp Reisner 	int vnr;
1452b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1453f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1454f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1455f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1456b411b363SPhilipp Reisner 	};
1457b411b363SPhilipp Reisner 
1458e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1459f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1460b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1461daeda1ccSPhilipp Reisner 	rcu_read_lock();
1462e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14638fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14648fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14658fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14668fe39aacSPhilipp Reisner 				bdev = NULL;
1467b30ab791SAndreas Gruenbacher 			put_ldev(device);
14684b0007c0SPhilipp Reisner 		}
14698fe39aacSPhilipp Reisner 	}
14708fe39aacSPhilipp Reisner 
14718fe39aacSPhilipp Reisner 	if (bdev)
14728fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14738fe39aacSPhilipp Reisner 
147470df7092SLars Ellenberg 	rcu_read_unlock();
147570df7092SLars Ellenberg 
1476e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1477f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1478e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1479b411b363SPhilipp Reisner }
1480b411b363SPhilipp Reisner 
1481f31e583aSLars Ellenberg /*
1482f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1483f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1484f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1485f31e583aSLars Ellenberg  * never even try to UNMAP.
1486f31e583aSLars Ellenberg  *
1487f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1488f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1489f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1490f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1491f31e583aSLars Ellenberg  *
1492f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1495f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1496f31e583aSLars Ellenberg  *
1497f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1498f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1499f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1500f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1501f31e583aSLars Ellenberg  * still behaves this way.
1502f31e583aSLars Ellenberg  *
1503f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1504f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1505f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1506f31e583aSLars Ellenberg  *
1507f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1508f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1509f31e583aSLars Ellenberg  */
1510f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1511f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1512dd4f699dSLars Ellenberg {
15130dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1514f31e583aSLars Ellenberg 	sector_t tmp, nr;
1515f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1516f31e583aSLars Ellenberg 	int alignment;
1517f31e583aSLars Ellenberg 	int err = 0;
1518dd4f699dSLars Ellenberg 
1519f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1520f31e583aSLars Ellenberg 		goto zero_out;
1521f31e583aSLars Ellenberg 
1522f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
15237b47ef52SChristoph Hellwig 	granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
1524f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1525f31e583aSLars Ellenberg 
1526cf0fbf89SChristoph Hellwig 	max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
1527f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1528f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1529f31e583aSLars Ellenberg 		goto zero_out;
1530f31e583aSLars Ellenberg 
1531f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1532f31e583aSLars Ellenberg 		goto zero_out;
1533f31e583aSLars Ellenberg 
1534f31e583aSLars Ellenberg 	tmp = start;
1535f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1536f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1537f31e583aSLars Ellenberg 			goto zero_out;
1538f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1539f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1540f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1541f31e583aSLars Ellenberg 
1542f31e583aSLars Ellenberg 		nr = tmp - start;
1543f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1544f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1545f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1546f31e583aSLars Ellenberg 		nr_sectors -= nr;
1547f31e583aSLars Ellenberg 		start = tmp;
1548f31e583aSLars Ellenberg 	}
1549f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
155044abff2cSChristoph Hellwig 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
155144abff2cSChristoph Hellwig 					    GFP_NOIO);
1552f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1553f31e583aSLars Ellenberg 		start += max_discard_sectors;
1554f31e583aSLars Ellenberg 	}
1555f31e583aSLars Ellenberg 	if (nr_sectors) {
1556f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1557f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1558f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1559f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1560f31e583aSLars Ellenberg 		nr = nr_sectors;
1561f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1562f31e583aSLars Ellenberg 		if (nr) {
156344abff2cSChristoph Hellwig 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
1564f31e583aSLars Ellenberg 			nr_sectors -= nr;
1565f31e583aSLars Ellenberg 			start += nr;
1566f31e583aSLars Ellenberg 		}
1567f31e583aSLars Ellenberg 	}
1568f31e583aSLars Ellenberg  zero_out:
1569f31e583aSLars Ellenberg 	if (nr_sectors) {
1570f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1571f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1572f31e583aSLars Ellenberg 	}
1573f31e583aSLars Ellenberg 	return err != 0;
1574f31e583aSLars Ellenberg }
1575f31e583aSLars Ellenberg 
1576f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1577f31e583aSLars Ellenberg {
1578f31e583aSLars Ellenberg 	struct disk_conf *dc;
1579f31e583aSLars Ellenberg 	bool can_do;
1580f31e583aSLars Ellenberg 
158170200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
1582f31e583aSLars Ellenberg 		return false;
1583f31e583aSLars Ellenberg 
1584f31e583aSLars Ellenberg 	rcu_read_lock();
1585f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1586f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1587f31e583aSLars Ellenberg 	rcu_read_unlock();
1588f31e583aSLars Ellenberg 	return can_do;
1589f31e583aSLars Ellenberg }
1590f31e583aSLars Ellenberg 
1591f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1592f31e583aSLars Ellenberg {
1593f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1594f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1595f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1596f31e583aSLars Ellenberg 	 * otherwise. */
1597f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1598f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1599f31e583aSLars Ellenberg 
1600f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1601f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1602dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1603dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1604dd4f699dSLars Ellenberg }
1605dd4f699dSLars Ellenberg 
1606a34592ffSChristoph Hellwig /**
1607fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1608b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1609db830c46SAndreas Gruenbacher  * @peer_req:	peer request
161010f6d992SLars Ellenberg  *
161110f6d992SLars Ellenberg  * May spread the pages to multiple bios,
161210f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
161310f6d992SLars Ellenberg  *
161410f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
161510f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
161610f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
161710f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
161810f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
161910f6d992SLars Ellenberg  *  on certain Xen deployments.
162045bb912bSLars Ellenberg  */
162145bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1622b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1623fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
162486563de8SBart Van Assche 			     const blk_opf_t opf, const int fault_type)
162545bb912bSLars Ellenberg {
162645bb912bSLars Ellenberg 	struct bio *bios = NULL;
162745bb912bSLars Ellenberg 	struct bio *bio;
1628db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1629db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1630e6be38a1SCai Huoqing 	unsigned int data_size = peer_req->i.size;
1631e6be38a1SCai Huoqing 	unsigned int n_bios = 0;
1632e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(data_size);
163345bb912bSLars Ellenberg 
1634dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1635dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1636dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1637dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1638dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1639dd4f699dSLars Ellenberg 	 */
1640a34592ffSChristoph Hellwig 	if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
1641a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1642a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16435dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
164445d2933cSLars Ellenberg 		/* add it to the active list now,
164545d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
164621ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
164721ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1648700ca8c0SPhilipp Reisner 
1649700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1650700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1651700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
165245d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
165345d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
165445d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1655700ca8c0SPhilipp Reisner 		}
1656700ca8c0SPhilipp Reisner 
1657f31e583aSLars Ellenberg 		drbd_issue_peer_discard_or_zero_out(device, peer_req);
1658a0fb3c47SLars Ellenberg 		return 0;
1659a0fb3c47SLars Ellenberg 	}
1660a0fb3c47SLars Ellenberg 
166145bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
166245bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
166345bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16649476f39dSLars Ellenberg 	 * request in more than one bio.
16659476f39dSLars Ellenberg 	 *
16669476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16679476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16689476f39dSLars Ellenberg 	 */
166945bb912bSLars Ellenberg next_bio:
167086563de8SBart Van Assche 	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO);
1671db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16724f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1673db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1674fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
167545bb912bSLars Ellenberg 
167645bb912bSLars Ellenberg 	bio->bi_next = bios;
167745bb912bSLars Ellenberg 	bios = bio;
167845bb912bSLars Ellenberg 	++n_bios;
167945bb912bSLars Ellenberg 
168045bb912bSLars Ellenberg 	page_chain_for_each(page) {
168111f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
168206efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
168345bb912bSLars Ellenberg 			goto next_bio;
168411f8b2b6SAndreas Gruenbacher 		data_size -= len;
168545bb912bSLars Ellenberg 		sector += len >> 9;
168645bb912bSLars Ellenberg 		--nr_pages;
168745bb912bSLars Ellenberg 	}
168811f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1689a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
169045bb912bSLars Ellenberg 
1691db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
169221ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
169321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
169421ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
169545bb912bSLars Ellenberg 	do {
169645bb912bSLars Ellenberg 		bio = bios;
169745bb912bSLars Ellenberg 		bios = bios->bi_next;
169845bb912bSLars Ellenberg 		bio->bi_next = NULL;
169945bb912bSLars Ellenberg 
1700ed00aabdSChristoph Hellwig 		drbd_submit_bio_noacct(device, fault_type, bio);
170145bb912bSLars Ellenberg 	} while (bios);
170245bb912bSLars Ellenberg 	return 0;
170345bb912bSLars Ellenberg }
170445bb912bSLars Ellenberg 
1705b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1706db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
170753840641SAndreas Gruenbacher {
1708db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
170953840641SAndreas Gruenbacher 
1710b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
171153840641SAndreas Gruenbacher 	drbd_clear_interval(i);
171253840641SAndreas Gruenbacher 
17136c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
171453840641SAndreas Gruenbacher 	if (i->waiting)
1715b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
171653840641SAndreas Gruenbacher }
171753840641SAndreas Gruenbacher 
1718bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
171977fede51SPhilipp Reisner {
1720c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
172177fede51SPhilipp Reisner 	int vnr;
172277fede51SPhilipp Reisner 
172377fede51SPhilipp Reisner 	rcu_read_lock();
1724c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1725c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1726c06ece6bSAndreas Gruenbacher 
1727b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
172877fede51SPhilipp Reisner 		rcu_read_unlock();
1729b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
173005a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
173177fede51SPhilipp Reisner 		rcu_read_lock();
173277fede51SPhilipp Reisner 	}
173377fede51SPhilipp Reisner 	rcu_read_unlock();
173477fede51SPhilipp Reisner }
173577fede51SPhilipp Reisner 
1736bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1737b411b363SPhilipp Reisner {
17382451fc3bSPhilipp Reisner 	int rv;
1739e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1740b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1741b411b363SPhilipp Reisner 
17429ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17439ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17449ed57dcbSLars Ellenberg 	 */
1745bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1746bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1747bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1748b411b363SPhilipp Reisner 
1749b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1750b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1751b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1752b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1753b411b363SPhilipp Reisner 	 * completed. */
1754e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1755f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1756b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
175782bc0194SAndreas Gruenbacher 			return 0;
1758b411b363SPhilipp Reisner 
1759b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1760b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1761b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17622451fc3bSPhilipp Reisner 		if (epoch)
17632451fc3bSPhilipp Reisner 			break;
17642451fc3bSPhilipp Reisner 		else
17651ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1766df561f66SGustavo A. R. Silva 		fallthrough;
17672451fc3bSPhilipp Reisner 
1768f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1769f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1770bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1771bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
17722451fc3bSPhilipp Reisner 
1773bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
17742451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17752451fc3bSPhilipp Reisner 			if (epoch)
17762451fc3bSPhilipp Reisner 				break;
1777b411b363SPhilipp Reisner 		}
1778b411b363SPhilipp Reisner 
177982bc0194SAndreas Gruenbacher 		return 0;
17802451fc3bSPhilipp Reisner 	default:
1781e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1782e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
178382bc0194SAndreas Gruenbacher 		return -EIO;
1784b411b363SPhilipp Reisner 	}
1785b411b363SPhilipp Reisner 
1786b411b363SPhilipp Reisner 	epoch->flags = 0;
1787b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1788b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1789b411b363SPhilipp Reisner 
1790bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1791bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1792bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1793bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1794bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1795b411b363SPhilipp Reisner 	} else {
1796b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1797b411b363SPhilipp Reisner 		kfree(epoch);
1798b411b363SPhilipp Reisner 	}
1799bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1800b411b363SPhilipp Reisner 
180182bc0194SAndreas Gruenbacher 	return 0;
1802b411b363SPhilipp Reisner }
1803b411b363SPhilipp Reisner 
18049104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18053d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18069104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18079104d31aSLars Ellenberg 			      unsigned int payload_size)
18089104d31aSLars Ellenberg {
18099104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18109104d31aSLars Ellenberg 	r->i.size = payload_size;
18119104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18129104d31aSLars Ellenberg 	r->i.size = tmp;
18139104d31aSLars Ellenberg }
18149104d31aSLars Ellenberg 
1815b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18169104d31aSLars Ellenberg  * and from receive_Data.
18179104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18189104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18199104d31aSLars Ellenberg  * 	for discards, that is zero.
18209104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18219104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18229104d31aSLars Ellenberg  * as extra argument in the packet header.
18239104d31aSLars Ellenberg  */
1824f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
182569a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1826a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1827b411b363SPhilipp Reisner {
182869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1829155bd9d1SChristoph Hellwig 	const sector_t capacity = get_capacity(device->vdisk);
1830db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1831b411b363SPhilipp Reisner 	struct page *page;
183211f8b2b6SAndreas Gruenbacher 	int digest_size, err;
183311f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
183469a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
183569a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18366b4388acSPhilipp Reisner 	unsigned long *data;
1837a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1838f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1839b411b363SPhilipp Reisner 
184011f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1841a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18423d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18439f5bdc33SAndreas Gruenbacher 		/*
18449f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18459f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18469f5bdc33SAndreas Gruenbacher 		 */
184711f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1848a5c31904SAndreas Gruenbacher 		if (err)
1849b411b363SPhilipp Reisner 			return NULL;
185011f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
185188104ca4SAndreas Gruenbacher 	}
1852b411b363SPhilipp Reisner 
1853a34592ffSChristoph Hellwig 	/* assume request_size == data_size, but special case trim. */
18549104d31aSLars Ellenberg 	ds = data_size;
1855a0fb3c47SLars Ellenberg 	if (trim) {
18569104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18579104d31aSLars Ellenberg 			return NULL;
18589104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1859f31e583aSLars Ellenberg 	} else if (zeroes) {
1860f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1861f31e583aSLars Ellenberg 			return NULL;
1862f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
1863a0fb3c47SLars Ellenberg 	}
1864a0fb3c47SLars Ellenberg 
18659104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1866841ce241SAndreas Gruenbacher 		return NULL;
1867a34592ffSChristoph Hellwig 	if (trim || zeroes) {
18689104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
18699104d31aSLars Ellenberg 			return NULL;
18709104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1871841ce241SAndreas Gruenbacher 		return NULL;
1872b411b363SPhilipp Reisner 
18736666032aSLars Ellenberg 	/* even though we trust out peer,
18746666032aSLars Ellenberg 	 * we sometimes have to double check. */
18759104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1876d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1877fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
18786666032aSLars Ellenberg 			(unsigned long long)capacity,
18799104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
18806666032aSLars Ellenberg 		return NULL;
18816666032aSLars Ellenberg 	}
18826666032aSLars Ellenberg 
1883b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1884b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1885b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
18869104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1887db830c46SAndreas Gruenbacher 	if (!peer_req)
1888b411b363SPhilipp Reisner 		return NULL;
188945bb912bSLars Ellenberg 
189021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
18919104d31aSLars Ellenberg 	if (trim) {
1892f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1893f31e583aSLars Ellenberg 		return peer_req;
1894f31e583aSLars Ellenberg 	}
1895f31e583aSLars Ellenberg 	if (zeroes) {
1896f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
189781a3537aSLars Ellenberg 		return peer_req;
18989104d31aSLars Ellenberg 	}
1899a73ff323SLars Ellenberg 
19009104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1901b411b363SPhilipp Reisner 	ds = data_size;
1902db830c46SAndreas Gruenbacher 	page = peer_req->pages;
190345bb912bSLars Ellenberg 	page_chain_for_each(page) {
190445bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19056b4388acSPhilipp Reisner 		data = kmap(page);
190669a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1907b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1908d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19096b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19106b4388acSPhilipp Reisner 		}
1911b411b363SPhilipp Reisner 		kunmap(page);
1912a5c31904SAndreas Gruenbacher 		if (err) {
1913b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1914b411b363SPhilipp Reisner 			return NULL;
1915b411b363SPhilipp Reisner 		}
1916a5c31904SAndreas Gruenbacher 		ds -= len;
1917b411b363SPhilipp Reisner 	}
1918b411b363SPhilipp Reisner 
191911f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19209104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
192111f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1922d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1923470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1924b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1925b411b363SPhilipp Reisner 			return NULL;
1926b411b363SPhilipp Reisner 		}
1927b411b363SPhilipp Reisner 	}
1928b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1929db830c46SAndreas Gruenbacher 	return peer_req;
1930b411b363SPhilipp Reisner }
1931b411b363SPhilipp Reisner 
1932b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1933b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1934b411b363SPhilipp Reisner  */
193569a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1936b411b363SPhilipp Reisner {
1937b411b363SPhilipp Reisner 	struct page *page;
1938a5c31904SAndreas Gruenbacher 	int err = 0;
1939b411b363SPhilipp Reisner 	void *data;
1940b411b363SPhilipp Reisner 
1941c3470cdeSLars Ellenberg 	if (!data_size)
1942fc5be839SAndreas Gruenbacher 		return 0;
1943c3470cdeSLars Ellenberg 
194469a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1945b411b363SPhilipp Reisner 
1946b411b363SPhilipp Reisner 	data = kmap(page);
1947b411b363SPhilipp Reisner 	while (data_size) {
1948fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1949fc5be839SAndreas Gruenbacher 
195069a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1951a5c31904SAndreas Gruenbacher 		if (err)
1952b411b363SPhilipp Reisner 			break;
1953a5c31904SAndreas Gruenbacher 		data_size -= len;
1954b411b363SPhilipp Reisner 	}
1955b411b363SPhilipp Reisner 	kunmap(page);
195669a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1957fc5be839SAndreas Gruenbacher 	return err;
1958b411b363SPhilipp Reisner }
1959b411b363SPhilipp Reisner 
196069a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1961b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1962b411b363SPhilipp Reisner {
19637988613bSKent Overstreet 	struct bio_vec bvec;
19647988613bSKent Overstreet 	struct bvec_iter iter;
1965b411b363SPhilipp Reisner 	struct bio *bio;
196611f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
196769a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
196869a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
1969b411b363SPhilipp Reisner 
197011f8b2b6SAndreas Gruenbacher 	digest_size = 0;
197169a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
19723d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
197311f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1974a5c31904SAndreas Gruenbacher 		if (err)
1975a5c31904SAndreas Gruenbacher 			return err;
197611f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
197788104ca4SAndreas Gruenbacher 	}
1978b411b363SPhilipp Reisner 
1979b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
1980b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
198169a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
1982b411b363SPhilipp Reisner 
1983b411b363SPhilipp Reisner 	bio = req->master_bio;
198469a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
1985b411b363SPhilipp Reisner 
19867988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
19873eddaa60SChristoph Hellwig 		void *mapped = bvec_kmap_local(&bvec);
19887988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
198969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
19903eddaa60SChristoph Hellwig 		kunmap_local(mapped);
1991a5c31904SAndreas Gruenbacher 		if (err)
1992a5c31904SAndreas Gruenbacher 			return err;
1993a5c31904SAndreas Gruenbacher 		data_size -= expect;
1994b411b363SPhilipp Reisner 	}
1995b411b363SPhilipp Reisner 
199611f8b2b6SAndreas Gruenbacher 	if (digest_size) {
199769a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
199811f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
199969a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
200028284cefSAndreas Gruenbacher 			return -EINVAL;
2001b411b363SPhilipp Reisner 		}
2002b411b363SPhilipp Reisner 	}
2003b411b363SPhilipp Reisner 
200469a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
200528284cefSAndreas Gruenbacher 	return 0;
2006b411b363SPhilipp Reisner }
2007b411b363SPhilipp Reisner 
2008a990be46SAndreas Gruenbacher /*
2009668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2010a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2011a990be46SAndreas Gruenbacher  */
201299920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2013b411b363SPhilipp Reisner {
20148050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2015a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2016a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2017a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2018db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
201999920dc5SAndreas Gruenbacher 	int err;
2020b411b363SPhilipp Reisner 
20210b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2022b411b363SPhilipp Reisner 
2023db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2024b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2025a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2026b411b363SPhilipp Reisner 	} else {
2027b411b363SPhilipp Reisner 		/* Record failure to sync */
2028b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2029b411b363SPhilipp Reisner 
2030a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2031b411b363SPhilipp Reisner 	}
2032b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2033b411b363SPhilipp Reisner 
203499920dc5SAndreas Gruenbacher 	return err;
2035b411b363SPhilipp Reisner }
2036b411b363SPhilipp Reisner 
203769a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2038a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2039b411b363SPhilipp Reisner {
204069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2041db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2042b411b363SPhilipp Reisner 
2043a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2044db830c46SAndreas Gruenbacher 	if (!peer_req)
204545bb912bSLars Ellenberg 		goto fail;
2046b411b363SPhilipp Reisner 
2047b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2048b411b363SPhilipp Reisner 
2049b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2050b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2051b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2052b411b363SPhilipp Reisner 
2053a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
205421ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
205545bb912bSLars Ellenberg 
20560500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2057b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
20580500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2059b411b363SPhilipp Reisner 
2060a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
206186563de8SBart Van Assche 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE,
2062bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
2063e1c1b0fcSAndreas Gruenbacher 		return 0;
206445bb912bSLars Ellenberg 
206510f6d992SLars Ellenberg 	/* don't care for the reason here */
2066d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
20670500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2068a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
20690500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
207022cc37a9SLars Ellenberg 
2071b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
207245bb912bSLars Ellenberg fail:
2073b30ab791SAndreas Gruenbacher 	put_ldev(device);
2074e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2075b411b363SPhilipp Reisner }
2076b411b363SPhilipp Reisner 
2077668eebc6SAndreas Gruenbacher static struct drbd_request *
2078b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2079bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2080b411b363SPhilipp Reisner {
2081b411b363SPhilipp Reisner 	struct drbd_request *req;
2082668eebc6SAndreas Gruenbacher 
2083bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2084bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
20855e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2086668eebc6SAndreas Gruenbacher 		return req;
2087c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2088d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2089c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2090c3afd8f5SAndreas Gruenbacher 	}
2091668eebc6SAndreas Gruenbacher 	return NULL;
2092668eebc6SAndreas Gruenbacher }
2093668eebc6SAndreas Gruenbacher 
2094bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2095b411b363SPhilipp Reisner {
20969f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2097b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2098b411b363SPhilipp Reisner 	struct drbd_request *req;
2099b411b363SPhilipp Reisner 	sector_t sector;
210082bc0194SAndreas Gruenbacher 	int err;
2101e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21024a76b161SAndreas Gruenbacher 
21039f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21049f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21054a76b161SAndreas Gruenbacher 		return -EIO;
21069f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2107b411b363SPhilipp Reisner 
2108b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2109b411b363SPhilipp Reisner 
21100500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2111b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21120500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2113c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
211482bc0194SAndreas Gruenbacher 		return -EIO;
2115b411b363SPhilipp Reisner 
211669a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
211782bc0194SAndreas Gruenbacher 	if (!err)
21188554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2119b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2120b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2121b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2122b411b363SPhilipp Reisner 
212382bc0194SAndreas Gruenbacher 	return err;
2124b411b363SPhilipp Reisner }
2125b411b363SPhilipp Reisner 
2126bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2127b411b363SPhilipp Reisner {
21289f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2129b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2130b411b363SPhilipp Reisner 	sector_t sector;
213182bc0194SAndreas Gruenbacher 	int err;
2132e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21334a76b161SAndreas Gruenbacher 
21349f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21359f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21364a76b161SAndreas Gruenbacher 		return -EIO;
21379f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2138b411b363SPhilipp Reisner 
2139b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21400b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2141b411b363SPhilipp Reisner 
2142b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2143b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2144b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2145fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2146a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2147b411b363SPhilipp Reisner 	} else {
2148b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2149d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2150b411b363SPhilipp Reisner 
215169a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2152b411b363SPhilipp Reisner 
215369a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2154b411b363SPhilipp Reisner 	}
2155b411b363SPhilipp Reisner 
2156b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2157778f271dSPhilipp Reisner 
215882bc0194SAndreas Gruenbacher 	return err;
2159b411b363SPhilipp Reisner }
2160b411b363SPhilipp Reisner 
2161b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
21627be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2163b411b363SPhilipp Reisner {
21647be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21657be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2166b411b363SPhilipp Reisner 
2167b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21687be8da07SAndreas Gruenbacher 		if (!i->local)
21697be8da07SAndreas Gruenbacher 			continue;
21707be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
21717be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
21727be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
21737be8da07SAndreas Gruenbacher 			continue;
21742312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
21752312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2176d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
21777be8da07SAndreas Gruenbacher 	}
21787be8da07SAndreas Gruenbacher }
21797be8da07SAndreas Gruenbacher 
2180a990be46SAndreas Gruenbacher /*
2181668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2182b411b363SPhilipp Reisner  */
218399920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2184b411b363SPhilipp Reisner {
21858050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2186a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2187a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2188a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2189db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
219099920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2191b411b363SPhilipp Reisner 
2192303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2193db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2194b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2195b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2196db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2197b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2198a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2199b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2200b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2201b411b363SPhilipp Reisner 		} else {
2202a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2203b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2204b411b363SPhilipp Reisner 			 * maybe assert this?  */
2205b411b363SPhilipp Reisner 		}
2206b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2207b411b363SPhilipp Reisner 	}
220808d0dabfSLars Ellenberg 
2209b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2210b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2211302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22120500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22130b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2214b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22157be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2216b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22170500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2218bb3bfe96SAndreas Gruenbacher 	} else
22190b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2220b411b363SPhilipp Reisner 
22215dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2222b411b363SPhilipp Reisner 
222399920dc5SAndreas Gruenbacher 	return err;
2224b411b363SPhilipp Reisner }
2225b411b363SPhilipp Reisner 
2226a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2227b411b363SPhilipp Reisner {
22288050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2229a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2230a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
223199920dc5SAndreas Gruenbacher 	int err;
2232b411b363SPhilipp Reisner 
2233a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2234a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2235b411b363SPhilipp Reisner 
223699920dc5SAndreas Gruenbacher 	return err;
2237b411b363SPhilipp Reisner }
2238b411b363SPhilipp Reisner 
2239d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2240b6a370baSPhilipp Reisner {
2241a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22427be8da07SAndreas Gruenbacher }
2243b6a370baSPhilipp Reisner 
224499920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22457be8da07SAndreas Gruenbacher {
2246a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2247a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2248a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
22497be8da07SAndreas Gruenbacher 
2250a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2251d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
22527be8da07SAndreas Gruenbacher }
22537be8da07SAndreas Gruenbacher 
22543e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
22553e394da1SAndreas Gruenbacher {
22563e394da1SAndreas Gruenbacher 	/*
22573e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
22583e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
22593e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
22603e394da1SAndreas Gruenbacher 	 */
22613e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
22623e394da1SAndreas Gruenbacher }
22633e394da1SAndreas Gruenbacher 
22643e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
22653e394da1SAndreas Gruenbacher {
22663e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
22673e394da1SAndreas Gruenbacher }
22683e394da1SAndreas Gruenbacher 
226969a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
22703e394da1SAndreas Gruenbacher {
227169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
22723c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
22733e394da1SAndreas Gruenbacher 
227469a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2275b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2276b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2277b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2278b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2279b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
22803c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2281b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
22823e394da1SAndreas Gruenbacher 	}
22837be8da07SAndreas Gruenbacher }
22843e394da1SAndreas Gruenbacher 
2285d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2286d93f6302SLars Ellenberg {
2287d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2288d93f6302SLars Ellenberg }
2289d93f6302SLars Ellenberg 
2290d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2291b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2292d93f6302SLars Ellenberg {
2293d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
22947e5fec31SFabian Frederick 	bool rv = false;
2295b6a370baSPhilipp Reisner 
22960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2297a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2298d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2299d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23007e5fec31SFabian Frederick 			rv = true;
2301b6a370baSPhilipp Reisner 			break;
2302b6a370baSPhilipp Reisner 		}
2303b6a370baSPhilipp Reisner 	}
23040500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2305b6a370baSPhilipp Reisner 
2306b6a370baSPhilipp Reisner 	return rv;
2307b6a370baSPhilipp Reisner }
2308b6a370baSPhilipp Reisner 
2309b411b363SPhilipp Reisner /* Called from receive_Data.
2310b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2311b411b363SPhilipp Reisner  *
2312b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2313b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2314b411b363SPhilipp Reisner  * been sent.
2315b411b363SPhilipp Reisner  *
2316b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2317b411b363SPhilipp Reisner  *
2318b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2319b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2320b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2321b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2322b411b363SPhilipp Reisner  *
2323b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2324b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2325b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2326b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2327b411b363SPhilipp Reisner  *
2328b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2329b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
233069a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2331b411b363SPhilipp Reisner {
233269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2333b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2334b411b363SPhilipp Reisner 	long timeout;
2335b874d231SPhilipp Reisner 	int ret = 0, tp;
23367be8da07SAndreas Gruenbacher 
233769a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23387be8da07SAndreas Gruenbacher 		return 0;
23397be8da07SAndreas Gruenbacher 
2340b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2341b411b363SPhilipp Reisner 	for (;;) {
2342b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2343b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2344b411b363SPhilipp Reisner 			break;
23457be8da07SAndreas Gruenbacher 		}
2346b874d231SPhilipp Reisner 
2347b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2348b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2349b411b363SPhilipp Reisner 			break;
2350b411b363SPhilipp Reisner 		}
2351b874d231SPhilipp Reisner 
2352b874d231SPhilipp Reisner 		rcu_read_lock();
23535dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2354b874d231SPhilipp Reisner 		rcu_read_unlock();
2355b874d231SPhilipp Reisner 
2356b874d231SPhilipp Reisner 		if (!tp)
2357b874d231SPhilipp Reisner 			break;
2358b874d231SPhilipp Reisner 
2359b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2360b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2361b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
236244ed167dSPhilipp Reisner 		rcu_read_lock();
236369a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
236444ed167dSPhilipp Reisner 		rcu_read_unlock();
236571b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2366b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
23677be8da07SAndreas Gruenbacher 		if (!timeout) {
2368b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2369d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2370b411b363SPhilipp Reisner 			break;
2371b411b363SPhilipp Reisner 		}
2372b411b363SPhilipp Reisner 	}
2373b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2374b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2375b411b363SPhilipp Reisner 	return ret;
2376b411b363SPhilipp Reisner }
2377b411b363SPhilipp Reisner 
2378688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2379688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2380688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
23819945172aSBart Van Assche static blk_opf_t wire_flags_to_bio_flags(u32 dpf)
238276d2e7ecSPhilipp Reisner {
238376d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
238476d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
238528a8f0d3SMike Christie 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2386bb3cc85eSMike Christie }
2387bb3cc85eSMike Christie 
23889945172aSBart Van Assche static enum req_op wire_flags_to_bio_op(u32 dpf)
2389bb3cc85eSMike Christie {
2390f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
239145c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2392f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2393f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2394bb3cc85eSMike Christie 	else
2395bb3cc85eSMike Christie 		return REQ_OP_WRITE;
239676d2e7ecSPhilipp Reisner }
239776d2e7ecSPhilipp Reisner 
2398b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
23997be8da07SAndreas Gruenbacher 				    unsigned int size)
2400b411b363SPhilipp Reisner {
24017be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24027be8da07SAndreas Gruenbacher 
24037be8da07SAndreas Gruenbacher     repeat:
2404b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24057be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24067be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24077be8da07SAndreas Gruenbacher 
24087be8da07SAndreas Gruenbacher 		if (!i->local)
24097be8da07SAndreas Gruenbacher 			continue;
24107be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24117be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24127be8da07SAndreas Gruenbacher 			continue;
24137be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24147be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24150500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24167be8da07SAndreas Gruenbacher 		if (m.bio)
2417b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24180500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24197be8da07SAndreas Gruenbacher 		goto repeat;
24207be8da07SAndreas Gruenbacher 	}
24217be8da07SAndreas Gruenbacher }
24227be8da07SAndreas Gruenbacher 
2423b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24247be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24257be8da07SAndreas Gruenbacher {
2426e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2427bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24287be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24297be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24307be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24317be8da07SAndreas Gruenbacher 	bool equal;
24327be8da07SAndreas Gruenbacher 	int err;
24337be8da07SAndreas Gruenbacher 
24347be8da07SAndreas Gruenbacher 	/*
24357be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24367be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24377be8da07SAndreas Gruenbacher 	 */
2438b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24397be8da07SAndreas Gruenbacher 
24407be8da07SAndreas Gruenbacher     repeat:
2441b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24427be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24437be8da07SAndreas Gruenbacher 			continue;
244408d0dabfSLars Ellenberg 		if (i->completed)
244508d0dabfSLars Ellenberg 			continue;
24467be8da07SAndreas Gruenbacher 
24477be8da07SAndreas Gruenbacher 		if (!i->local) {
24487be8da07SAndreas Gruenbacher 			/*
24497be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
24507be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
24517be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
24527be8da07SAndreas Gruenbacher 			 */
2453b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
24547be8da07SAndreas Gruenbacher 			if (err)
24557be8da07SAndreas Gruenbacher 				goto out;
24567be8da07SAndreas Gruenbacher 			goto repeat;
24577be8da07SAndreas Gruenbacher 		}
24587be8da07SAndreas Gruenbacher 
24597be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
24607be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
24617be8da07SAndreas Gruenbacher 			/*
24627be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2463d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2464d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2465d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
24667be8da07SAndreas Gruenbacher 			 */
2467d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
24687be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
24697be8da07SAndreas Gruenbacher 
24707be8da07SAndreas Gruenbacher 			if (!equal)
2471d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
24727be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
24737be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
24747be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
24757be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2476d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
24777be8da07SAndreas Gruenbacher 
2478a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
24797be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2480a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2481668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
24827be8da07SAndreas Gruenbacher 
24837be8da07SAndreas Gruenbacher 			err = -ENOENT;
24847be8da07SAndreas Gruenbacher 			goto out;
24857be8da07SAndreas Gruenbacher 		} else {
24867be8da07SAndreas Gruenbacher 			struct drbd_request *req =
24877be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
24887be8da07SAndreas Gruenbacher 
24897be8da07SAndreas Gruenbacher 			if (!equal)
2490d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
24917be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
24927be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
24937be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
24947be8da07SAndreas Gruenbacher 
24957be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
24967be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
24977be8da07SAndreas Gruenbacher 				/*
24987be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2499d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2500d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2501d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25027be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25037be8da07SAndreas Gruenbacher 				 *
25047be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25057be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25067be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25077be8da07SAndreas Gruenbacher 				 */
2508b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25097be8da07SAndreas Gruenbacher 				if (err) {
2510e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2511b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25127be8da07SAndreas Gruenbacher 					goto out;
25137be8da07SAndreas Gruenbacher 				}
25147be8da07SAndreas Gruenbacher 				goto repeat;
25157be8da07SAndreas Gruenbacher 			}
25167be8da07SAndreas Gruenbacher 			/*
25177be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25187be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25197be8da07SAndreas Gruenbacher 			 */
25207be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25217be8da07SAndreas Gruenbacher 		}
25227be8da07SAndreas Gruenbacher 	}
25237be8da07SAndreas Gruenbacher 	err = 0;
25247be8da07SAndreas Gruenbacher 
25257be8da07SAndreas Gruenbacher     out:
25267be8da07SAndreas Gruenbacher 	if (err)
2527b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25287be8da07SAndreas Gruenbacher 	return err;
25297be8da07SAndreas Gruenbacher }
25307be8da07SAndreas Gruenbacher 
2531b411b363SPhilipp Reisner /* mirrored write */
2532bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2533b411b363SPhilipp Reisner {
25349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2535b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
253621ae5d7fSLars Ellenberg 	struct net_conf *nc;
2537b411b363SPhilipp Reisner 	sector_t sector;
2538db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2539e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25407be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
25419945172aSBart Van Assche 	enum req_op op;
25429945172aSBart Van Assche 	blk_opf_t op_flags;
2543b411b363SPhilipp Reisner 	u32 dp_flags;
2544302bdeaeSPhilipp Reisner 	int err, tp;
25457be8da07SAndreas Gruenbacher 
25469f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25479f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25484a76b161SAndreas Gruenbacher 		return -EIO;
25499f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2550b411b363SPhilipp Reisner 
2551b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
255282bc0194SAndreas Gruenbacher 		int err2;
2553b411b363SPhilipp Reisner 
255469a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
255569a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2556bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
255769a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
255882bc0194SAndreas Gruenbacher 		if (!err)
255982bc0194SAndreas Gruenbacher 			err = err2;
256082bc0194SAndreas Gruenbacher 		return err;
2561b411b363SPhilipp Reisner 	}
2562b411b363SPhilipp Reisner 
2563fcefa62eSAndreas Gruenbacher 	/*
2564fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2565fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2566fcefa62eSAndreas Gruenbacher 	 * end of this function.
2567fcefa62eSAndreas Gruenbacher 	 */
2568b411b363SPhilipp Reisner 
2569b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2570a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2571db830c46SAndreas Gruenbacher 	if (!peer_req) {
2572b30ab791SAndreas Gruenbacher 		put_ldev(device);
257382bc0194SAndreas Gruenbacher 		return -EIO;
2574b411b363SPhilipp Reisner 	}
2575b411b363SPhilipp Reisner 
2576a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
257721ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
257821ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2579b411b363SPhilipp Reisner 
2580688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2581bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2582bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2583a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2584a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2585f31e583aSLars Ellenberg 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2586f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2587f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2588f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2589f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2590f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2591f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2592f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
259345c21793SChristoph Hellwig 		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2594a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2595f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2596f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2597f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2598a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
25990b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26000b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2601a73ff323SLars Ellenberg 	}
2602688593c5SLars Ellenberg 
2603688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2604db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2605688593c5SLars Ellenberg 
2606bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2607bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2608db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2609db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2610bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2611b411b363SPhilipp Reisner 
2612302bdeaeSPhilipp Reisner 	rcu_read_lock();
261321ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
261421ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
261521ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
261621ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
261721ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
261821ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
261921ae5d7fSLars Ellenberg 			break;
262021ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
262121ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
262221ae5d7fSLars Ellenberg 			break;
262321ae5d7fSLars Ellenberg 		}
262421ae5d7fSLars Ellenberg 	}
2625302bdeaeSPhilipp Reisner 	rcu_read_unlock();
262621ae5d7fSLars Ellenberg 
262721ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
262821ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
262921ae5d7fSLars Ellenberg 		inc_unacked(device);
263021ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
263121ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
263221ae5d7fSLars Ellenberg 	}
263321ae5d7fSLars Ellenberg 
263421ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
263521ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
263621ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26375dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
263821ae5d7fSLars Ellenberg 	}
263921ae5d7fSLars Ellenberg 
2640302bdeaeSPhilipp Reisner 	if (tp) {
264121ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
264221ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2643302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
264469a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26457be8da07SAndreas Gruenbacher 		if (err)
2646b411b363SPhilipp Reisner 			goto out_interrupted;
26470500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2648b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
26497be8da07SAndreas Gruenbacher 		if (err) {
26500500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
26517be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2652b30ab791SAndreas Gruenbacher 				put_ldev(device);
265382bc0194SAndreas Gruenbacher 				return 0;
2654b411b363SPhilipp Reisner 			}
2655b411b363SPhilipp Reisner 			goto out_interrupted;
2656b411b363SPhilipp Reisner 		}
2657b874d231SPhilipp Reisner 	} else {
265869a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
26590500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2660b874d231SPhilipp Reisner 	}
2661a34592ffSChristoph Hellwig 	/* TRIM and is processed synchronously,
26629104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2663a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2664a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2665a34592ffSChristoph Hellwig 	if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
2666b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
26670500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2668b411b363SPhilipp Reisner 
2669b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2670b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2671b6a370baSPhilipp Reisner 
2672b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2673b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2674b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2675db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
26764dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
267721ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2678b411b363SPhilipp Reisner 	}
2679b411b363SPhilipp Reisner 
268086563de8SBart Van Assche 	err = drbd_submit_peer_request(device, peer_req, op | op_flags,
2681bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
268282bc0194SAndreas Gruenbacher 	if (!err)
268382bc0194SAndreas Gruenbacher 		return 0;
2684b411b363SPhilipp Reisner 
268510f6d992SLars Ellenberg 	/* don't care for the reason here */
2686d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
26870500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2688a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2689b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
26900500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
269121ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
269221ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2693b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
269421ae5d7fSLars Ellenberg 	}
269522cc37a9SLars Ellenberg 
2696b411b363SPhilipp Reisner out_interrupted:
26977e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2698b30ab791SAndreas Gruenbacher 	put_ldev(device);
2699b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
270082bc0194SAndreas Gruenbacher 	return err;
2701b411b363SPhilipp Reisner }
2702b411b363SPhilipp Reisner 
27030f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27040f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27050f0601f4SLars Ellenberg  *
27060f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27070f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27080f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27090f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27100f0601f4SLars Ellenberg  *
27110f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27120f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27130f0601f4SLars Ellenberg  */
2714ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2715ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2716e8299874SLars Ellenberg {
2717e8299874SLars Ellenberg 	struct lc_element *tmp;
2718ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2719e8299874SLars Ellenberg 
2720ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2721ad3fee79SLars Ellenberg 		return throttle;
2722e8299874SLars Ellenberg 
2723e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2724e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2725e8299874SLars Ellenberg 	if (tmp) {
2726e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2727e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2728e8299874SLars Ellenberg 			throttle = false;
2729ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2730ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2731e8299874SLars Ellenberg 	}
2732e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2733e8299874SLars Ellenberg 
2734e8299874SLars Ellenberg 	return throttle;
2735e8299874SLars Ellenberg }
2736e8299874SLars Ellenberg 
2737e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27380f0601f4SLars Ellenberg {
27398c40c7c4SChristoph Hellwig 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27400f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2741daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2742e8299874SLars Ellenberg 	int curr_events;
2743daeda1ccSPhilipp Reisner 
2744daeda1ccSPhilipp Reisner 	rcu_read_lock();
2745b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2746daeda1ccSPhilipp Reisner 	rcu_read_unlock();
27470f0601f4SLars Ellenberg 
27480f0601f4SLars Ellenberg 	/* feature disabled? */
2749daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2750e8299874SLars Ellenberg 		return false;
2751e3555d85SPhilipp Reisner 
27528446fe92SChristoph Hellwig 	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2753b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2754ad3fee79SLars Ellenberg 
2755ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2756ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
27570f0601f4SLars Ellenberg 		unsigned long rs_left;
27580f0601f4SLars Ellenberg 		int i;
27590f0601f4SLars Ellenberg 
2760b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
27610f0601f4SLars Ellenberg 
27620f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
27630f0601f4SLars Ellenberg 		 * approx. */
2764b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
27652649f080SLars Ellenberg 
2766b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2767b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
27682649f080SLars Ellenberg 		else
2769b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
27700f0601f4SLars Ellenberg 
2771b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
27720f0601f4SLars Ellenberg 		if (!dt)
27730f0601f4SLars Ellenberg 			dt++;
2774b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
27750f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
27760f0601f4SLars Ellenberg 
2777daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2778e8299874SLars Ellenberg 			return true;
27790f0601f4SLars Ellenberg 	}
2780e8299874SLars Ellenberg 	return false;
27810f0601f4SLars Ellenberg }
27820f0601f4SLars Ellenberg 
2783bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2784b411b363SPhilipp Reisner {
27859f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2786b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2787b411b363SPhilipp Reisner 	sector_t sector;
27884a76b161SAndreas Gruenbacher 	sector_t capacity;
2789db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2790b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2791b18b37beSPhilipp Reisner 	int size, verb;
2792b411b363SPhilipp Reisner 	unsigned int fault_type;
2793e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
27944a76b161SAndreas Gruenbacher 
27959f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
27969f4fe9adSAndreas Gruenbacher 	if (!peer_device)
27974a76b161SAndreas Gruenbacher 		return -EIO;
27989f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2799155bd9d1SChristoph Hellwig 	capacity = get_capacity(device->vdisk);
2800b411b363SPhilipp Reisner 
2801b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2802b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2803b411b363SPhilipp Reisner 
2804c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2805d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2806b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
280782bc0194SAndreas Gruenbacher 		return -EINVAL;
2808b411b363SPhilipp Reisner 	}
2809b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2810d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2811b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
281282bc0194SAndreas Gruenbacher 		return -EINVAL;
2813b411b363SPhilipp Reisner 	}
2814b411b363SPhilipp Reisner 
2815b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2816b18b37beSPhilipp Reisner 		verb = 1;
2817e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2818b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
281969a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2820b18b37beSPhilipp Reisner 			break;
2821700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2822b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2823b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2824b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
282569a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2826b18b37beSPhilipp Reisner 			break;
2827b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2828b18b37beSPhilipp Reisner 			verb = 0;
2829b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
283069a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2831b18b37beSPhilipp Reisner 			break;
2832b18b37beSPhilipp Reisner 		default:
283349ba9b1bSAndreas Gruenbacher 			BUG();
2834b18b37beSPhilipp Reisner 		}
2835b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2836d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2837b411b363SPhilipp Reisner 			    "no local data.\n");
2838b18b37beSPhilipp Reisner 
2839a821cc4aSLars Ellenberg 		/* drain possibly payload */
284069a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2841b411b363SPhilipp Reisner 	}
2842b411b363SPhilipp Reisner 
2843b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2844b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2845b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2846a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28479104d31aSLars Ellenberg 			size, GFP_NOIO);
2848db830c46SAndreas Gruenbacher 	if (!peer_req) {
2849b30ab791SAndreas Gruenbacher 		put_ldev(device);
285082bc0194SAndreas Gruenbacher 		return -ENOMEM;
2851b411b363SPhilipp Reisner 	}
2852b411b363SPhilipp Reisner 
2853e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2854b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2855a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2856b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
285780a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
285821ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
285980a40e43SLars Ellenberg 		goto submit;
286080a40e43SLars Ellenberg 
2861700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2862700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2863700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2864700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2865700ca8c0SPhilipp Reisner 		   the block... */
2866700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2867df561f66SGustavo A. R. Silva 		fallthrough;
2868b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2869a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2870b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
28715f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2872b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2873b411b363SPhilipp Reisner 		break;
2874b411b363SPhilipp Reisner 
2875b411b363SPhilipp Reisner 	case P_OV_REPLY:
2876b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2877b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2878e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2879b411b363SPhilipp Reisner 		if (!di)
2880b411b363SPhilipp Reisner 			goto out_free_e;
2881b411b363SPhilipp Reisner 
2882e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2883b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2884b411b363SPhilipp Reisner 
2885db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2886db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2887c36c3cedSLars Ellenberg 
28889f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2889b411b363SPhilipp Reisner 			goto out_free_e;
2890b411b363SPhilipp Reisner 
2891e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
28929f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2893a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
28945f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2895b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2896aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2897aaaba345SLars Ellenberg 			device->use_csums = true;
2898e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
28992649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2900b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2901a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2902b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29030f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29040f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29050f0601f4SLars Ellenberg 			goto submit_for_resync;
2906b411b363SPhilipp Reisner 		}
2907b411b363SPhilipp Reisner 		break;
2908b411b363SPhilipp Reisner 
2909b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2910b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29119f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2912de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2913de228bbaSLars Ellenberg 			int i;
2914b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2915b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2916b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2917b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2918de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2919b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2920b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2921de228bbaSLars Ellenberg 			}
2922d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2923b411b363SPhilipp Reisner 					(unsigned long long)sector);
2924b411b363SPhilipp Reisner 		}
2925a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2926b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2927b411b363SPhilipp Reisner 		break;
2928b411b363SPhilipp Reisner 
2929b411b363SPhilipp Reisner 	default:
293049ba9b1bSAndreas Gruenbacher 		BUG();
2931b411b363SPhilipp Reisner 	}
2932b411b363SPhilipp Reisner 
29330f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29340f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29350f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29360f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29370f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29380f0601f4SLars Ellenberg 	 *
29390f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29400f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29410f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29420f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29430f0601f4SLars Ellenberg 	 * a while, anyways.
29440f0601f4SLars Ellenberg 	 */
2945b411b363SPhilipp Reisner 
29460f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
29470f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
29480f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
29490f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
29500f0601f4SLars Ellenberg 	 *
29510f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
29520f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
29530f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
29540f0601f4SLars Ellenberg 	 */
2955c5a2c150SLars Ellenberg 
2956c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2957c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2958c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2959c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2960c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2961c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2962c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
2963c5a2c150SLars Ellenberg 
2964944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2965ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
2966ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
2967e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2968944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
2969b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
297080a40e43SLars Ellenberg 		goto out_free_e;
2971b411b363SPhilipp Reisner 
29720f0601f4SLars Ellenberg submit_for_resync:
2973b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
29740f0601f4SLars Ellenberg 
297580a40e43SLars Ellenberg submit:
2976944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
2977b30ab791SAndreas Gruenbacher 	inc_unacked(device);
297886563de8SBart Van Assche 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ,
2979bb3cc85eSMike Christie 				     fault_type) == 0)
298082bc0194SAndreas Gruenbacher 		return 0;
2981b411b363SPhilipp Reisner 
298210f6d992SLars Ellenberg 	/* don't care for the reason here */
2983d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
2984c5a2c150SLars Ellenberg 
2985c5a2c150SLars Ellenberg out_free_e:
29860500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2987a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
29880500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
298922cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
299022cc37a9SLars Ellenberg 
2991b30ab791SAndreas Gruenbacher 	put_ldev(device);
2992b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
299382bc0194SAndreas Gruenbacher 	return -EIO;
2994b411b363SPhilipp Reisner }
2995b411b363SPhilipp Reisner 
29969b48ff07SLee Jones /*
299769a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
299869a22773SAndreas Gruenbacher  */
299969a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3000b411b363SPhilipp Reisner {
300169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3002b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3003b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
300444ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3005b411b363SPhilipp Reisner 
3006b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3007b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3008b411b363SPhilipp Reisner 
3009b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3010b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3011b411b363SPhilipp Reisner 
301244ed167dSPhilipp Reisner 	rcu_read_lock();
301369a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
301444ed167dSPhilipp Reisner 	rcu_read_unlock();
301544ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3016b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3017b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3018b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
301944ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3020d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3021b411b363SPhilipp Reisner 		break;
3022b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3023b411b363SPhilipp Reisner 		break;
3024b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3025b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3026b411b363SPhilipp Reisner 			rv = -1;
3027b411b363SPhilipp Reisner 			break;
3028b411b363SPhilipp Reisner 		}
3029b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3030b411b363SPhilipp Reisner 			rv =  1;
3031b411b363SPhilipp Reisner 			break;
3032b411b363SPhilipp Reisner 		}
3033df561f66SGustavo A. R. Silva 		fallthrough;	/* to one of the other strategies */
3034b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3035b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3036b411b363SPhilipp Reisner 			rv = 1;
3037b411b363SPhilipp Reisner 			break;
3038b411b363SPhilipp Reisner 		}
3039b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3040b411b363SPhilipp Reisner 			rv = -1;
3041b411b363SPhilipp Reisner 			break;
3042b411b363SPhilipp Reisner 		}
3043b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3044d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3045b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3046df561f66SGustavo A. R. Silva 		fallthrough;
3047b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3048b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
304969a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3050b411b363SPhilipp Reisner 				? -1 : 1;
3051b411b363SPhilipp Reisner 			break;
3052b411b363SPhilipp Reisner 		} else {
3053b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3054b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3055b411b363SPhilipp Reisner 		}
305644ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3057b411b363SPhilipp Reisner 			break;
3058df561f66SGustavo A. R. Silva 		fallthrough;
3059b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3060b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3061b411b363SPhilipp Reisner 			rv = -1;
3062b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3063b411b363SPhilipp Reisner 			rv =  1;
3064b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3065b411b363SPhilipp Reisner 		     /* Well, then use something else. */
306669a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3067b411b363SPhilipp Reisner 				? -1 : 1;
3068b411b363SPhilipp Reisner 		break;
3069b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3070b411b363SPhilipp Reisner 		rv = -1;
3071b411b363SPhilipp Reisner 		break;
3072b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3073b411b363SPhilipp Reisner 		rv =  1;
3074b411b363SPhilipp Reisner 	}
3075b411b363SPhilipp Reisner 
3076b411b363SPhilipp Reisner 	return rv;
3077b411b363SPhilipp Reisner }
3078b411b363SPhilipp Reisner 
30799b48ff07SLee Jones /*
308069a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
308169a22773SAndreas Gruenbacher  */
308269a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3083b411b363SPhilipp Reisner {
308469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
30856184ea21SAndreas Gruenbacher 	int hg, rv = -100;
308644ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3087b411b363SPhilipp Reisner 
308844ed167dSPhilipp Reisner 	rcu_read_lock();
308969a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
309044ed167dSPhilipp Reisner 	rcu_read_unlock();
309144ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3092b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3093b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3094b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3095b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3096b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
309744ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3098d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3099b411b363SPhilipp Reisner 		break;
3100b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3101b411b363SPhilipp Reisner 		break;
3102b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
310369a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3104b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3105b411b363SPhilipp Reisner 			rv = hg;
3106b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3107b411b363SPhilipp Reisner 			rv = hg;
3108b411b363SPhilipp Reisner 		break;
3109b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
311069a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3111b411b363SPhilipp Reisner 		break;
3112b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3113b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3114b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
311569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3116b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3117bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3118bb437946SAndreas Gruenbacher 
3119b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3120b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3121b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3122b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3123bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3124b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3125b411b363SPhilipp Reisner 			} else {
3126d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3127b411b363SPhilipp Reisner 				rv = hg;
3128b411b363SPhilipp Reisner 			}
3129b411b363SPhilipp Reisner 		} else
3130b411b363SPhilipp Reisner 			rv = hg;
3131b411b363SPhilipp Reisner 	}
3132b411b363SPhilipp Reisner 
3133b411b363SPhilipp Reisner 	return rv;
3134b411b363SPhilipp Reisner }
3135b411b363SPhilipp Reisner 
31369b48ff07SLee Jones /*
313769a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
313869a22773SAndreas Gruenbacher  */
313969a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3140b411b363SPhilipp Reisner {
314169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31426184ea21SAndreas Gruenbacher 	int hg, rv = -100;
314344ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3144b411b363SPhilipp Reisner 
314544ed167dSPhilipp Reisner 	rcu_read_lock();
314669a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
314744ed167dSPhilipp Reisner 	rcu_read_unlock();
314844ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3149b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3150b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3151b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3152b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3153b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3154b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3155b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
315644ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3157d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3158b411b363SPhilipp Reisner 		break;
3159b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
316069a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3161b411b363SPhilipp Reisner 		break;
3162b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3163b411b363SPhilipp Reisner 		break;
3164b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
316569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3166b411b363SPhilipp Reisner 		if (hg == -1) {
3167bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3168bb437946SAndreas Gruenbacher 
3169b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3170b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3171b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3172b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3173bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3174b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3175b411b363SPhilipp Reisner 			} else {
3176d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3177b411b363SPhilipp Reisner 				rv = hg;
3178b411b363SPhilipp Reisner 			}
3179b411b363SPhilipp Reisner 		} else
3180b411b363SPhilipp Reisner 			rv = hg;
3181b411b363SPhilipp Reisner 	}
3182b411b363SPhilipp Reisner 
3183b411b363SPhilipp Reisner 	return rv;
3184b411b363SPhilipp Reisner }
3185b411b363SPhilipp Reisner 
3186b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3187b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3188b411b363SPhilipp Reisner {
3189b411b363SPhilipp Reisner 	if (!uuid) {
3190d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3191b411b363SPhilipp Reisner 		return;
3192b411b363SPhilipp Reisner 	}
3193d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3194b411b363SPhilipp Reisner 	     text,
3195b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3196b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3197b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3198b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3199b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3200b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3201b411b363SPhilipp Reisner }
3202b411b363SPhilipp Reisner 
3203b411b363SPhilipp Reisner /*
3204b411b363SPhilipp Reisner   100	after split brain try auto recover
3205b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3206b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3207b411b363SPhilipp Reisner     0	no Sync
3208b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3209b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3210b411b363SPhilipp Reisner  -100	after split brain, disconnect
3211b411b363SPhilipp Reisner -1000	unrelated data
32124a23f264SPhilipp Reisner -1091   requires proto 91
32134a23f264SPhilipp Reisner -1096   requires proto 96
3214b411b363SPhilipp Reisner  */
3215f2d3d75bSLars Ellenberg 
3216f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3217b411b363SPhilipp Reisner {
321844a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
321944a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3220b411b363SPhilipp Reisner 	u64 self, peer;
3221b411b363SPhilipp Reisner 	int i, j;
3222b411b363SPhilipp Reisner 
3223b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3224b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3225b411b363SPhilipp Reisner 
3226b411b363SPhilipp Reisner 	*rule_nr = 10;
3227b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3228b411b363SPhilipp Reisner 		return 0;
3229b411b363SPhilipp Reisner 
3230b411b363SPhilipp Reisner 	*rule_nr = 20;
3231b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3232b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3233b411b363SPhilipp Reisner 		return -2;
3234b411b363SPhilipp Reisner 
3235b411b363SPhilipp Reisner 	*rule_nr = 30;
3236b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3237b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3238b411b363SPhilipp Reisner 		return 2;
3239b411b363SPhilipp Reisner 
3240b411b363SPhilipp Reisner 	if (self == peer) {
3241b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3242b411b363SPhilipp Reisner 
3243b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3244b411b363SPhilipp Reisner 
324544a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32464a23f264SPhilipp Reisner 				return -1091;
3247b411b363SPhilipp Reisner 
3248b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3249b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3250d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3251b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3252b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3253b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3254b411b363SPhilipp Reisner 
3255b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3256b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3257b411b363SPhilipp Reisner 				*rule_nr = 34;
3258b411b363SPhilipp Reisner 			} else {
3259d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3260b411b363SPhilipp Reisner 				*rule_nr = 36;
3261b411b363SPhilipp Reisner 			}
3262b411b363SPhilipp Reisner 
3263b411b363SPhilipp Reisner 			return 1;
3264b411b363SPhilipp Reisner 		}
3265b411b363SPhilipp Reisner 
3266b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3267b411b363SPhilipp Reisner 
326844a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32694a23f264SPhilipp Reisner 				return -1091;
3270b411b363SPhilipp Reisner 
3271b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3272b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3273d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3274b411b363SPhilipp Reisner 
3275b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3276b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3277b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3278b411b363SPhilipp Reisner 
3279b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3280b411b363SPhilipp Reisner 				*rule_nr = 35;
3281b411b363SPhilipp Reisner 			} else {
3282d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3283b411b363SPhilipp Reisner 				*rule_nr = 37;
3284b411b363SPhilipp Reisner 			}
3285b411b363SPhilipp Reisner 
3286b411b363SPhilipp Reisner 			return -1;
3287b411b363SPhilipp Reisner 		}
3288b411b363SPhilipp Reisner 
3289b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3290b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3291b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3292b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3293b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3294b411b363SPhilipp Reisner 		*rule_nr = 40;
3295b411b363SPhilipp Reisner 
3296f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3297f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3298f2d3d75bSLars Ellenberg 		if (rct == 0)
3299f2d3d75bSLars Ellenberg 			return 0;
3300f2d3d75bSLars Ellenberg 
3301f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3302f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3303f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3304f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3305f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3306f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3307f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3308f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3309f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3310f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3311f2d3d75bSLars Ellenberg 			}
3312f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3313f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3314f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3315f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3316f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3317f2d3d75bSLars Ellenberg 				return -100;
3318f2d3d75bSLars Ellenberg 			}
3319f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3320f2d3d75bSLars Ellenberg 				return 1;
3321f2d3d75bSLars Ellenberg 			return -1;
3322f2d3d75bSLars Ellenberg 		}
3323f2d3d75bSLars Ellenberg 
3324f2d3d75bSLars Ellenberg 		/* Both are secondary.
3325f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3326f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3327b411b363SPhilipp Reisner 		switch (rct) {
3328f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3329b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3330b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3331b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
333244a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3333b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3334b411b363SPhilipp Reisner 		}
3335b411b363SPhilipp Reisner 	}
3336b411b363SPhilipp Reisner 
3337b411b363SPhilipp Reisner 	*rule_nr = 50;
3338b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3339b411b363SPhilipp Reisner 	if (self == peer)
3340b411b363SPhilipp Reisner 		return -1;
3341b411b363SPhilipp Reisner 
3342b411b363SPhilipp Reisner 	*rule_nr = 51;
3343b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3344b411b363SPhilipp Reisner 	if (self == peer) {
334544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3346b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3347b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3348b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3349b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3350b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3351b411b363SPhilipp Reisner 
335244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33534a23f264SPhilipp Reisner 				return -1091;
3354b411b363SPhilipp Reisner 
3355b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3356b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
33574a23f264SPhilipp Reisner 
3358d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3359b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
33604a23f264SPhilipp Reisner 
3361b411b363SPhilipp Reisner 			return -1;
3362b411b363SPhilipp Reisner 		}
3363b411b363SPhilipp Reisner 	}
3364b411b363SPhilipp Reisner 
3365b411b363SPhilipp Reisner 	*rule_nr = 60;
3366b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3367b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3368b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3369b411b363SPhilipp Reisner 		if (self == peer)
3370b411b363SPhilipp Reisner 			return -2;
3371b411b363SPhilipp Reisner 	}
3372b411b363SPhilipp Reisner 
3373b411b363SPhilipp Reisner 	*rule_nr = 70;
3374b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3375b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3376b411b363SPhilipp Reisner 	if (self == peer)
3377b411b363SPhilipp Reisner 		return 1;
3378b411b363SPhilipp Reisner 
3379b411b363SPhilipp Reisner 	*rule_nr = 71;
3380b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3381b411b363SPhilipp Reisner 	if (self == peer) {
338244a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3383b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3384b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3385b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3386b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3387b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3388b411b363SPhilipp Reisner 
338944a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33904a23f264SPhilipp Reisner 				return -1091;
3391b411b363SPhilipp Reisner 
3392b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3393b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3394b411b363SPhilipp Reisner 
3395d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3396b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3397b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3398b411b363SPhilipp Reisner 
3399b411b363SPhilipp Reisner 			return 1;
3400b411b363SPhilipp Reisner 		}
3401b411b363SPhilipp Reisner 	}
3402b411b363SPhilipp Reisner 
3403b411b363SPhilipp Reisner 
3404b411b363SPhilipp Reisner 	*rule_nr = 80;
3405b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3406b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3407b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3408b411b363SPhilipp Reisner 		if (self == peer)
3409b411b363SPhilipp Reisner 			return 2;
3410b411b363SPhilipp Reisner 	}
3411b411b363SPhilipp Reisner 
3412b411b363SPhilipp Reisner 	*rule_nr = 90;
3413b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3414b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3415b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3416b411b363SPhilipp Reisner 		return 100;
3417b411b363SPhilipp Reisner 
3418b411b363SPhilipp Reisner 	*rule_nr = 100;
3419b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3420b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3421b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3422b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3423b411b363SPhilipp Reisner 			if (self == peer)
3424b411b363SPhilipp Reisner 				return -100;
3425b411b363SPhilipp Reisner 		}
3426b411b363SPhilipp Reisner 	}
3427b411b363SPhilipp Reisner 
3428b411b363SPhilipp Reisner 	return -1000;
3429b411b363SPhilipp Reisner }
3430b411b363SPhilipp Reisner 
3431b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3432b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3433b411b363SPhilipp Reisner  */
343469a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
343569a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3436b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3437b411b363SPhilipp Reisner {
343869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3439b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3440b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
344144ed167dSPhilipp Reisner 	struct net_conf *nc;
3442d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3443b411b363SPhilipp Reisner 
3444b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3445b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3446b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3447b411b363SPhilipp Reisner 
3448d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
34499f2247bbSPhilipp Reisner 
3450b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3451b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3452b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3453b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3454b411b363SPhilipp Reisner 
3455f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3456b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3457b411b363SPhilipp Reisner 
3458d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3459b411b363SPhilipp Reisner 
3460b411b363SPhilipp Reisner 	if (hg == -1000) {
3461d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3462b411b363SPhilipp Reisner 		return C_MASK;
3463b411b363SPhilipp Reisner 	}
3464f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3465f2d3d75bSLars Ellenberg 		int proto, fflags;
3466f2d3d75bSLars Ellenberg 		hg = -hg;
3467f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3468f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3469f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3470f2d3d75bSLars Ellenberg 					proto, fflags);
3471f2d3d75bSLars Ellenberg 		return C_MASK;
3472f2d3d75bSLars Ellenberg 	}
34734a23f264SPhilipp Reisner 	if (hg < -1000) {
3474d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3475b411b363SPhilipp Reisner 		return C_MASK;
3476b411b363SPhilipp Reisner 	}
3477b411b363SPhilipp Reisner 
3478b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3479b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3480b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3481b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3482b411b363SPhilipp Reisner 		if (f)
3483b411b363SPhilipp Reisner 			hg = hg*2;
3484d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3485b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3486b411b363SPhilipp Reisner 	}
3487b411b363SPhilipp Reisner 
34883a11a487SAdam Gandelman 	if (abs(hg) == 100)
3489b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
34903a11a487SAdam Gandelman 
349144ed167dSPhilipp Reisner 	rcu_read_lock();
349269a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3493d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3494d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3495d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3496d29e89e3SRoland Kammerer 	rcu_read_unlock();
349744ed167dSPhilipp Reisner 
3498d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3499b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3500b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3501b411b363SPhilipp Reisner 		int forced = (hg == -100);
3502b411b363SPhilipp Reisner 
3503b411b363SPhilipp Reisner 		switch (pcount) {
3504b411b363SPhilipp Reisner 		case 0:
350569a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3506b411b363SPhilipp Reisner 			break;
3507b411b363SPhilipp Reisner 		case 1:
350869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3509b411b363SPhilipp Reisner 			break;
3510b411b363SPhilipp Reisner 		case 2:
351169a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3512b411b363SPhilipp Reisner 			break;
3513b411b363SPhilipp Reisner 		}
3514b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3515d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3516b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3517b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3518b411b363SPhilipp Reisner 			if (forced) {
3519d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3520b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3521b411b363SPhilipp Reisner 				hg = hg*2;
3522b411b363SPhilipp Reisner 			}
3523b411b363SPhilipp Reisner 		}
3524b411b363SPhilipp Reisner 	}
3525b411b363SPhilipp Reisner 
3526b411b363SPhilipp Reisner 	if (hg == -100) {
3527b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3528b411b363SPhilipp Reisner 			hg = -1;
3529b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3530b411b363SPhilipp Reisner 			hg = 1;
3531b411b363SPhilipp Reisner 
3532b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3533d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3534b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3535b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3536b411b363SPhilipp Reisner 	}
3537b411b363SPhilipp Reisner 
3538b411b363SPhilipp Reisner 	if (hg == -100) {
3539580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3540580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3541580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3542580b9767SLars Ellenberg 		 * to that disk, in a way... */
3543d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3544b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3545b411b363SPhilipp Reisner 		return C_MASK;
3546b411b363SPhilipp Reisner 	}
3547b411b363SPhilipp Reisner 
3548b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3549d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3550b411b363SPhilipp Reisner 		return C_MASK;
3551b411b363SPhilipp Reisner 	}
3552b411b363SPhilipp Reisner 
3553b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3554b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
355544ed167dSPhilipp Reisner 		switch (rr_conflict) {
3556b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3557b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3558df561f66SGustavo A. R. Silva 			fallthrough;
3559b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3560d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3561b411b363SPhilipp Reisner 			return C_MASK;
3562b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3563d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3564b411b363SPhilipp Reisner 			     "assumption\n");
3565b411b363SPhilipp Reisner 		}
3566b411b363SPhilipp Reisner 	}
3567b411b363SPhilipp Reisner 
356869a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3569cf14c2e9SPhilipp Reisner 		if (hg == 0)
3570d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3571cf14c2e9SPhilipp Reisner 		else
3572d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3573cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3574cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3575cf14c2e9SPhilipp Reisner 		return C_MASK;
3576cf14c2e9SPhilipp Reisner 	}
3577cf14c2e9SPhilipp Reisner 
3578b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3579d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3580b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
358120ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3582b411b363SPhilipp Reisner 			return C_MASK;
3583b411b363SPhilipp Reisner 	}
3584b411b363SPhilipp Reisner 
3585b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3586b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3587b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3588b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3589b411b363SPhilipp Reisner 	} else {
3590b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3591b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3592d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3593b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3594b411b363SPhilipp Reisner 		}
3595b411b363SPhilipp Reisner 	}
3596b411b363SPhilipp Reisner 
3597b411b363SPhilipp Reisner 	return rv;
3598b411b363SPhilipp Reisner }
3599b411b363SPhilipp Reisner 
3600f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3601b411b363SPhilipp Reisner {
3602b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3603f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3604f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3605b411b363SPhilipp Reisner 
3606b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3607f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3608f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3609b411b363SPhilipp Reisner 
3610b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3611f179d76dSPhilipp Reisner 	return peer;
3612b411b363SPhilipp Reisner }
3613b411b363SPhilipp Reisner 
3614bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3615b411b363SPhilipp Reisner {
3616e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3617036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3618036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3619036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3620036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36213d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36227aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3623b411b363SPhilipp Reisner 
3624b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3625b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3626b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3627b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3628b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3629cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36306139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3631cf14c2e9SPhilipp Reisner 
3632bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
363386db0618SAndreas Gruenbacher 		int err;
363486db0618SAndreas Gruenbacher 
363588104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
363686db0618SAndreas Gruenbacher 			return -EIO;
3637bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
363886db0618SAndreas Gruenbacher 		if (err)
363986db0618SAndreas Gruenbacher 			return err;
364088104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3641036b17eaSPhilipp Reisner 	}
364286db0618SAndreas Gruenbacher 
36437d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3644bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3645cf14c2e9SPhilipp Reisner 
3646cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3647bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3648b411b363SPhilipp Reisner 
364944ed167dSPhilipp Reisner 		rcu_read_lock();
3650bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
365144ed167dSPhilipp Reisner 
3652036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
36531ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
365444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3655b411b363SPhilipp Reisner 		}
3656b411b363SPhilipp Reisner 
3657f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
36581ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
365944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3660b411b363SPhilipp Reisner 		}
3661b411b363SPhilipp Reisner 
3662f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
36631ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
366444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3665b411b363SPhilipp Reisner 		}
3666b411b363SPhilipp Reisner 
3667f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
36681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
366944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3670b411b363SPhilipp Reisner 		}
3671b411b363SPhilipp Reisner 
36726139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
36731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
367444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3675b411b363SPhilipp Reisner 		}
3676b411b363SPhilipp Reisner 
367744ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
36781ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
367944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3680b411b363SPhilipp Reisner 		}
3681b411b363SPhilipp Reisner 
3682036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
36831ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3684036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3685036b17eaSPhilipp Reisner 		}
3686036b17eaSPhilipp Reisner 
368786db0618SAndreas Gruenbacher 		rcu_read_unlock();
3688fbc12f45SAndreas Gruenbacher 	}
36897d4c782cSAndreas Gruenbacher 
36907d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
36917d4c782cSAndreas Gruenbacher 		int hash_size;
36927d4c782cSAndreas Gruenbacher 
36937d4c782cSAndreas Gruenbacher 		/*
36947d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
36957d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
36967d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
36977d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
36987d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
36997d4c782cSAndreas Gruenbacher 		 * change.
37007d4c782cSAndreas Gruenbacher 		 */
37017d4c782cSAndreas Gruenbacher 
37023d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37031b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37041b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37051ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37067d4c782cSAndreas Gruenbacher 				 integrity_alg);
3707b411b363SPhilipp Reisner 			goto disconnect;
3708b411b363SPhilipp Reisner 		}
3709b411b363SPhilipp Reisner 
37103d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37117d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37127d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37137d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37141ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37157d4c782cSAndreas Gruenbacher 			goto disconnect;
37167d4c782cSAndreas Gruenbacher 		}
37177d4c782cSAndreas Gruenbacher 	}
37187d4c782cSAndreas Gruenbacher 
37197d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37208404e191SZhen Lei 	if (!new_net_conf)
3721b411b363SPhilipp Reisner 		goto disconnect;
3722b411b363SPhilipp Reisner 
3723bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37240500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3725bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37267d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3727b411b363SPhilipp Reisner 
37287d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37297d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37307d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37317d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37327d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3733b411b363SPhilipp Reisner 
3734bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37350500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3736bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3737b411b363SPhilipp Reisner 
37383d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3739bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3740bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3741bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3742bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3743bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3744b411b363SPhilipp Reisner 
37457d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37461ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
37477d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3748b411b363SPhilipp Reisner 
374990c6c291SUladzislau Rezki (Sony) 	kvfree_rcu(old_net_conf);
375082bc0194SAndreas Gruenbacher 	return 0;
3751b411b363SPhilipp Reisner 
375244ed167dSPhilipp Reisner disconnect_rcu_unlock:
375344ed167dSPhilipp Reisner 	rcu_read_unlock();
3754b411b363SPhilipp Reisner disconnect:
37553d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3756036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3757036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3758bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
375982bc0194SAndreas Gruenbacher 	return -EIO;
3760b411b363SPhilipp Reisner }
3761b411b363SPhilipp Reisner 
3762b411b363SPhilipp Reisner /* helper function
3763b411b363SPhilipp Reisner  * input: alg name, feature name
3764b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3765b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3766b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
37673d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
37683d0e6375SKees Cook 		const struct drbd_device *device,
3769b411b363SPhilipp Reisner 		const char *alg, const char *name)
3770b411b363SPhilipp Reisner {
37713d0e6375SKees Cook 	struct crypto_shash *tfm;
3772b411b363SPhilipp Reisner 
3773b411b363SPhilipp Reisner 	if (!alg[0])
3774b411b363SPhilipp Reisner 		return NULL;
3775b411b363SPhilipp Reisner 
37763d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3777b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3778d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3779b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3780b411b363SPhilipp Reisner 		return tfm;
3781b411b363SPhilipp Reisner 	}
3782b411b363SPhilipp Reisner 	return tfm;
3783b411b363SPhilipp Reisner }
3784b411b363SPhilipp Reisner 
3785bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3786b411b363SPhilipp Reisner {
3787bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
37884a76b161SAndreas Gruenbacher 	int size = pi->size;
37894a76b161SAndreas Gruenbacher 
37904a76b161SAndreas Gruenbacher 	while (size) {
37914a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3792bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
37934a76b161SAndreas Gruenbacher 		if (s <= 0) {
37944a76b161SAndreas Gruenbacher 			if (s < 0)
37954a76b161SAndreas Gruenbacher 				return s;
37964a76b161SAndreas Gruenbacher 			break;
37974a76b161SAndreas Gruenbacher 		}
37984a76b161SAndreas Gruenbacher 		size -= s;
37994a76b161SAndreas Gruenbacher 	}
38004a76b161SAndreas Gruenbacher 	if (size)
38014a76b161SAndreas Gruenbacher 		return -EIO;
38024a76b161SAndreas Gruenbacher 	return 0;
38034a76b161SAndreas Gruenbacher }
38044a76b161SAndreas Gruenbacher 
38054a76b161SAndreas Gruenbacher /*
38064a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38074a76b161SAndreas Gruenbacher  *
38084a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38094a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38104a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38114a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38124a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38134a76b161SAndreas Gruenbacher  *
38144a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38154a76b161SAndreas Gruenbacher  */
3816bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38174a76b161SAndreas Gruenbacher {
38181ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38192fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3820bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38214a76b161SAndreas Gruenbacher }
38224a76b161SAndreas Gruenbacher 
3823bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38244a76b161SAndreas Gruenbacher {
38259f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3826b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3827e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3828b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38293d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38303d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38312ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3832813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3833bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3834813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38356a365874SStephen Kitt 	unsigned int fifo_size = 0;
383682bc0194SAndreas Gruenbacher 	int err;
3837b411b363SPhilipp Reisner 
38389f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38399f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3840bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38419f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3842b411b363SPhilipp Reisner 
3843b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3844b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3845b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
38468e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
38478e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3848b411b363SPhilipp Reisner 
3849e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3850d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3851e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
385282bc0194SAndreas Gruenbacher 		return -EIO;
3853b411b363SPhilipp Reisner 	}
3854b411b363SPhilipp Reisner 
3855b411b363SPhilipp Reisner 	if (apv <= 88) {
3856e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3857e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38588e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3859e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3860e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38610b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
38628e26f9ccSPhilipp Reisner 	} else {
3863e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3864e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38650b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3866b411b363SPhilipp Reisner 	}
3867b411b363SPhilipp Reisner 
3868b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3869e658983aSAndreas Gruenbacher 	p = pi->data;
387052a0cab3SKees Cook 	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
387152a0cab3SKees Cook 	memset(&p->algs, 0, sizeof(p->algs));
3872b411b363SPhilipp Reisner 
38739f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
387482bc0194SAndreas Gruenbacher 	if (err)
387582bc0194SAndreas Gruenbacher 		return err;
3876b411b363SPhilipp Reisner 
38770500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
38789f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3879b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3880daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3881daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3882b30ab791SAndreas Gruenbacher 			put_ldev(device);
38830500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3884d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3885daeda1ccSPhilipp Reisner 			return -ENOMEM;
3886f399002eSLars Ellenberg 		}
3887b411b363SPhilipp Reisner 
3888b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3889daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3890daeda1ccSPhilipp Reisner 
38916394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3892813472ceSPhilipp Reisner 	}
3893b411b363SPhilipp Reisner 
3894b411b363SPhilipp Reisner 	if (apv >= 88) {
3895b411b363SPhilipp Reisner 		if (apv == 88) {
38965de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3897d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
38985de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3899b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3900813472ceSPhilipp Reisner 				goto reconnect;
3901b411b363SPhilipp Reisner 			}
3902b411b363SPhilipp Reisner 
39039f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3904813472ceSPhilipp Reisner 			if (err)
3905813472ceSPhilipp Reisner 				goto reconnect;
3906b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3907b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39080b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3909b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3910b411b363SPhilipp Reisner 
3911b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3912b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3913b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39140b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39150b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3916b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3917b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3918b411b363SPhilipp Reisner 		}
3919b411b363SPhilipp Reisner 
39202ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3921b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3922d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39232ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3924b411b363SPhilipp Reisner 				goto disconnect;
3925b411b363SPhilipp Reisner 			}
3926b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3927b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3928b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3929b411b363SPhilipp Reisner 				verify_tfm = NULL;
3930b411b363SPhilipp Reisner 				goto disconnect;
3931b411b363SPhilipp Reisner 			}
3932b411b363SPhilipp Reisner 		}
3933b411b363SPhilipp Reisner 
39342ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3935b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3936d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39372ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3938b411b363SPhilipp Reisner 				goto disconnect;
3939b411b363SPhilipp Reisner 			}
3940b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3941b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3942b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3943b411b363SPhilipp Reisner 				csums_tfm = NULL;
3944b411b363SPhilipp Reisner 				goto disconnect;
3945b411b363SPhilipp Reisner 			}
3946b411b363SPhilipp Reisner 		}
3947b411b363SPhilipp Reisner 
3948813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3949daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3950daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3951daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3952daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3953778f271dSPhilipp Reisner 
3954daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3955b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3956813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3957813472ceSPhilipp Reisner 				if (!new_plan) {
3958d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3959b30ab791SAndreas Gruenbacher 					put_ldev(device);
3960778f271dSPhilipp Reisner 					goto disconnect;
3961778f271dSPhilipp Reisner 				}
3962778f271dSPhilipp Reisner 			}
39638e26f9ccSPhilipp Reisner 		}
3964b411b363SPhilipp Reisner 
396591fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
39662ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
39678404e191SZhen Lei 			if (!new_net_conf)
396891fd4dadSPhilipp Reisner 				goto disconnect;
396991fd4dadSPhilipp Reisner 
39702ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
397191fd4dadSPhilipp Reisner 
3972b411b363SPhilipp Reisner 			if (verify_tfm) {
39732ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
39742ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
39753d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
39769f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
3977d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3978b411b363SPhilipp Reisner 			}
3979b411b363SPhilipp Reisner 			if (csums_tfm) {
39802ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
39812ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
39823d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
39839f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
3984d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3985b411b363SPhilipp Reisner 			}
3986bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
3987778f271dSPhilipp Reisner 		}
3988b411b363SPhilipp Reisner 	}
3989b411b363SPhilipp Reisner 
3990813472ceSPhilipp Reisner 	if (new_disk_conf) {
3991b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3992b30ab791SAndreas Gruenbacher 		put_ldev(device);
3993b411b363SPhilipp Reisner 	}
3994813472ceSPhilipp Reisner 
3995813472ceSPhilipp Reisner 	if (new_plan) {
3996b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
3997b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
3998813472ceSPhilipp Reisner 	}
3999daeda1ccSPhilipp Reisner 
40000500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4001daeda1ccSPhilipp Reisner 	synchronize_rcu();
4002daeda1ccSPhilipp Reisner 	if (new_net_conf)
4003daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4004daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4005813472ceSPhilipp Reisner 	kfree(old_plan);
4006daeda1ccSPhilipp Reisner 
400782bc0194SAndreas Gruenbacher 	return 0;
4008b411b363SPhilipp Reisner 
4009813472ceSPhilipp Reisner reconnect:
4010813472ceSPhilipp Reisner 	if (new_disk_conf) {
4011b30ab791SAndreas Gruenbacher 		put_ldev(device);
4012813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4013813472ceSPhilipp Reisner 	}
40140500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4015813472ceSPhilipp Reisner 	return -EIO;
4016813472ceSPhilipp Reisner 
4017b411b363SPhilipp Reisner disconnect:
4018813472ceSPhilipp Reisner 	kfree(new_plan);
4019813472ceSPhilipp Reisner 	if (new_disk_conf) {
4020b30ab791SAndreas Gruenbacher 		put_ldev(device);
4021813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4022813472ceSPhilipp Reisner 	}
40230500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4024b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4025b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40263d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4027b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40283d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40299f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
403082bc0194SAndreas Gruenbacher 	return -EIO;
4031b411b363SPhilipp Reisner }
4032b411b363SPhilipp Reisner 
4033b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4034b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4035b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4036b411b363SPhilipp Reisner {
4037b411b363SPhilipp Reisner 	sector_t d;
4038b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4039b411b363SPhilipp Reisner 		return;
4040b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4041b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4042d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4043b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4044b411b363SPhilipp Reisner }
4045b411b363SPhilipp Reisner 
4046bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4047b411b363SPhilipp Reisner {
40489f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4049b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4050e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
40519104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4052e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
40536a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
405494c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4055b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4056e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4057b411b363SPhilipp Reisner 
40589f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40599f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4060bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
40619f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4062155bd9d1SChristoph Hellwig 	cur_size = get_capacity(device->vdisk);
40634a76b161SAndreas Gruenbacher 
4064b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4065b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
40666a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4067b411b363SPhilipp Reisner 
4068b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4069b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4070b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4071b411b363SPhilipp Reisner 
4072b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4073daeda1ccSPhilipp Reisner 		rcu_read_lock();
4074b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4075daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4076daeda1ccSPhilipp Reisner 
4077b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4078b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4079b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4080daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4081b411b363SPhilipp Reisner 
4082b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4083b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4084b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4085daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4086b411b363SPhilipp Reisner 
4087ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4088ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4089ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
409060bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
409160bac040SLars Ellenberg 		if (new_size < cur_size &&
4092b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4093ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
409460bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
409560bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
40969f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4097b30ab791SAndreas Gruenbacher 			put_ldev(device);
409882bc0194SAndreas Gruenbacher 			return -EIO;
4099b411b363SPhilipp Reisner 		}
4100daeda1ccSPhilipp Reisner 
4101daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4102daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4103daeda1ccSPhilipp Reisner 
4104daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4105daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4106b30ab791SAndreas Gruenbacher 				put_ldev(device);
4107daeda1ccSPhilipp Reisner 				return -ENOMEM;
4108daeda1ccSPhilipp Reisner 			}
4109daeda1ccSPhilipp Reisner 
41100500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4111b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4112daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4113daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4114daeda1ccSPhilipp Reisner 
4115b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41160500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
411790c6c291SUladzislau Rezki (Sony) 			kvfree_rcu(old_disk_conf);
4118daeda1ccSPhilipp Reisner 
4119ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4120ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4121daeda1ccSPhilipp Reisner 		}
4122daeda1ccSPhilipp Reisner 
4123b30ab791SAndreas Gruenbacher 		put_ldev(device);
4124b411b363SPhilipp Reisner 	}
4125b411b363SPhilipp Reisner 
412620c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4127dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
412820c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4129dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
413020c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
413120c68fdeSLars Ellenberg 
4132e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4133b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41349104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4135b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4136b30ab791SAndreas Gruenbacher 		put_ldev(device);
4137e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
413882bc0194SAndreas Gruenbacher 			return -EIO;
4139b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4140b411b363SPhilipp Reisner 	} else {
41416a8d68b1SLars Ellenberg 		/*
41426a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
41436a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
41446a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
41456a8d68b1SLars Ellenberg 		 *
41466a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
41476a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
41486a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
41496a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
41506a8d68b1SLars Ellenberg 		 *
41516a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
41526a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
415394c43a13SLars Ellenberg 		 *
415494c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
415594c43a13SLars Ellenberg 		 * In which case we ignore this completely.
41566a8d68b1SLars Ellenberg 		 */
415794c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
41589104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
415994c43a13SLars Ellenberg 		if (new_size == 0) {
416094c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
416194c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
416294c43a13SLars Ellenberg 			/* nothing to do */
416394c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
416494c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
416594c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
416694c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
416794c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
416894c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
416994c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
417094c43a13SLars Ellenberg 			return -EIO;
417194c43a13SLars Ellenberg 		} else {
417294c43a13SLars Ellenberg 			/* I believe the peer, if
417394c43a13SLars Ellenberg 			 *  - I don't have a current size myself
417494c43a13SLars Ellenberg 			 *  - we agree on the size anyways
417594c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
417694c43a13SLars Ellenberg 			 *    and he has the only disk
417794c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
417894c43a13SLars Ellenberg 			 *    and he has the only disk,
417994c43a13SLars Ellenberg 			 *    which is larger than my current size
418094c43a13SLars Ellenberg 			 */
418194c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
418294c43a13SLars Ellenberg 		}
4183b411b363SPhilipp Reisner 	}
4184b411b363SPhilipp Reisner 
4185b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4186b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4187b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4188b411b363SPhilipp Reisner 			ldsc = 1;
4189b411b363SPhilipp Reisner 		}
4190b411b363SPhilipp Reisner 
4191b30ab791SAndreas Gruenbacher 		put_ldev(device);
4192b411b363SPhilipp Reisner 	}
4193b411b363SPhilipp Reisner 
4194b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4195155bd9d1SChristoph Hellwig 		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4196155bd9d1SChristoph Hellwig 		    ldsc) {
4197b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4198b411b363SPhilipp Reisner 			 * needs to know my new size... */
419969a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4200b411b363SPhilipp Reisner 		}
4201b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4202b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4203b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4204b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4205e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4206d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4207b411b363SPhilipp Reisner 				else
4208b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4209e89b591cSPhilipp Reisner 			} else
4210b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4211b411b363SPhilipp Reisner 		}
4212b411b363SPhilipp Reisner 	}
4213b411b363SPhilipp Reisner 
421482bc0194SAndreas Gruenbacher 	return 0;
4215b411b363SPhilipp Reisner }
4216b411b363SPhilipp Reisner 
4217bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4218b411b363SPhilipp Reisner {
42199f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4220b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4221e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4222b411b363SPhilipp Reisner 	u64 *p_uuid;
422362b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4224b411b363SPhilipp Reisner 
42259f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42269f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4227bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42289f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42294a76b161SAndreas Gruenbacher 
4230365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42318404e191SZhen Lei 	if (!p_uuid)
4232063eacf8SJing Wang 		return false;
4233b411b363SPhilipp Reisner 
4234b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4235b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4236b411b363SPhilipp Reisner 
4237b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4238b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4239b411b363SPhilipp Reisner 
4240b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4241b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4242b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4243b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4244d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4245b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
42469f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
424782bc0194SAndreas Gruenbacher 		return -EIO;
4248b411b363SPhilipp Reisner 	}
4249b411b363SPhilipp Reisner 
4250b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4251b411b363SPhilipp Reisner 		int skip_initial_sync =
4252b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
42539f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4254b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4255b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4256b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4257d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4258b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
425920ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
426020ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4261b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4262b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4263b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4264b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4265b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
426662b0da3aSLars Ellenberg 			updated_uuids = 1;
4267b411b363SPhilipp Reisner 		}
4268b30ab791SAndreas Gruenbacher 		put_ldev(device);
4269b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4270b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
427118a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
427218a50fa2SPhilipp Reisner 		   for me. */
4273b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4274b411b363SPhilipp Reisner 	}
4275b411b363SPhilipp Reisner 
4276b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4277b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4278b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4279b411b363SPhilipp Reisner 	   new disk state... */
4280b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4281b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4282b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4283b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
428462b0da3aSLars Ellenberg 
428562b0da3aSLars Ellenberg 	if (updated_uuids)
4286b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4287b411b363SPhilipp Reisner 
428882bc0194SAndreas Gruenbacher 	return 0;
4289b411b363SPhilipp Reisner }
4290b411b363SPhilipp Reisner 
4291b411b363SPhilipp Reisner /**
4292b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4293b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4294b411b363SPhilipp Reisner  */
4295b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4296b411b363SPhilipp Reisner {
4297b411b363SPhilipp Reisner 	union drbd_state ms;
4298b411b363SPhilipp Reisner 
4299b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4300369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4301b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4302b411b363SPhilipp Reisner 
4303b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4304b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4305b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4306b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4307b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4308b411b363SPhilipp Reisner 	};
4309b411b363SPhilipp Reisner 
4310b411b363SPhilipp Reisner 	ms.i = ps.i;
4311b411b363SPhilipp Reisner 
4312b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4313b411b363SPhilipp Reisner 	ms.peer = ps.role;
4314b411b363SPhilipp Reisner 	ms.role = ps.peer;
4315b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4316b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4317b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4318b411b363SPhilipp Reisner 
4319b411b363SPhilipp Reisner 	return ms;
4320b411b363SPhilipp Reisner }
4321b411b363SPhilipp Reisner 
4322bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4323b411b363SPhilipp Reisner {
43249f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4325b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4326e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4327b411b363SPhilipp Reisner 	union drbd_state mask, val;
4328bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4329b411b363SPhilipp Reisner 
43309f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43319f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43324a76b161SAndreas Gruenbacher 		return -EIO;
43339f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43344a76b161SAndreas Gruenbacher 
4335b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4336b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4337b411b363SPhilipp Reisner 
43389f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4339b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
434069a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
434182bc0194SAndreas Gruenbacher 		return 0;
4342b411b363SPhilipp Reisner 	}
4343b411b363SPhilipp Reisner 
4344b411b363SPhilipp Reisner 	mask = convert_state(mask);
4345b411b363SPhilipp Reisner 	val = convert_state(val);
4346b411b363SPhilipp Reisner 
4347b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
434869a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4349047cd4a6SPhilipp Reisner 
4350b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4351b411b363SPhilipp Reisner 
435282bc0194SAndreas Gruenbacher 	return 0;
4353b411b363SPhilipp Reisner }
4354b411b363SPhilipp Reisner 
4355bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4356b411b363SPhilipp Reisner {
4357e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4358dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4359dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4360dfafcc8aSPhilipp Reisner 
4361dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4362dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4363dfafcc8aSPhilipp Reisner 
4364bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4365bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4366bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
436782bc0194SAndreas Gruenbacher 		return 0;
4368dfafcc8aSPhilipp Reisner 	}
4369dfafcc8aSPhilipp Reisner 
4370dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4371dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4372dfafcc8aSPhilipp Reisner 
4373bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4374bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4375dfafcc8aSPhilipp Reisner 
437682bc0194SAndreas Gruenbacher 	return 0;
4377dfafcc8aSPhilipp Reisner }
4378dfafcc8aSPhilipp Reisner 
4379bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4380b411b363SPhilipp Reisner {
43819f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4382b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4383e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
43844ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4385b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
438665d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4387b411b363SPhilipp Reisner 	int rv;
4388b411b363SPhilipp Reisner 
43899f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43909f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4391bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
43929f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43934a76b161SAndreas Gruenbacher 
4394b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4395b411b363SPhilipp Reisner 
4396b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4397b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4398b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4399d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4400b411b363SPhilipp Reisner 	}
4401b411b363SPhilipp Reisner 
44020500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4403b411b363SPhilipp Reisner  retry:
4404b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44050500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4406b411b363SPhilipp Reisner 
4407668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4408545752d5SLars Ellenberg 	 * already decided to close the connection again,
4409545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4410545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
441158ffa580SLars Ellenberg 		return -ECONNRESET;
4412545752d5SLars Ellenberg 
441340424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
441440424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
441540424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
441640424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
441740424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
441840424e4aSLars Ellenberg 	 */
441940424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
442040424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4421e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4422e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4423e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4424e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4425e9ef7bb6SLars Ellenberg 		 * syncing states.
4426e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4427e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4428e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4429e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4430e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4431e9ef7bb6SLars Ellenberg 
4432e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4433e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4434e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4435e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4436e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4437b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4438b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
443982bc0194SAndreas Gruenbacher 			return 0;
4440e9ef7bb6SLars Ellenberg 		}
4441e9ef7bb6SLars Ellenberg 	}
4442e9ef7bb6SLars Ellenberg 
444302b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
444402b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
444502b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4446b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4447b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
444858ffa580SLars Ellenberg 		return 0;
444902b91b55SLars Ellenberg 	}
445002b91b55SLars Ellenberg 
4451e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4452e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4453e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4454e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4455e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4456e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4457e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4458e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4459e9ef7bb6SLars Ellenberg 
44604ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
44614ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4462b411b363SPhilipp Reisner 
446367531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
446467531718SPhilipp Reisner 		ns.conn = C_BEHIND;
446567531718SPhilipp Reisner 
4466fe43ed97SLars Ellenberg 	/* TODO:
4467fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4468fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4469fe43ed97SLars Ellenberg 	 *
4470fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4471fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4472fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4473fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4474fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4475fe43ed97SLars Ellenberg 	 *
4476fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4477fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4478fe43ed97SLars Ellenberg 	 *
4479fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4480fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4481fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4482fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4483fe43ed97SLars Ellenberg 	 */
4484fe43ed97SLars Ellenberg 
4485b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4486b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4487b411b363SPhilipp Reisner 		int cr; /* consider resync */
4488b411b363SPhilipp Reisner 
4489b411b363SPhilipp Reisner 		/* if we established a new connection */
44904ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4491b411b363SPhilipp Reisner 		/* if we had an established connection
4492b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
44934ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4494b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
44954ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4496b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4497a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4498b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4499b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4500b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45014ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4502b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4503b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4504b411b363SPhilipp Reisner 
4505b411b363SPhilipp Reisner 		if (cr)
450669a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4507b411b363SPhilipp Reisner 
4508b30ab791SAndreas Gruenbacher 		put_ldev(device);
45094ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45104ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4511b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4512b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4513b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4514d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4515b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4516580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4517b411b363SPhilipp Reisner 			} else {
45189f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
451982bc0194SAndreas Gruenbacher 					return -EIO;
45200b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45219f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
452282bc0194SAndreas Gruenbacher 				return -EIO;
4523b411b363SPhilipp Reisner 			}
4524b411b363SPhilipp Reisner 		}
4525b411b363SPhilipp Reisner 	}
4526b411b363SPhilipp Reisner 
45270500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4528b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4529b411b363SPhilipp Reisner 		goto retry;
4530b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4531b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4532b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4533b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45344ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4535b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45364ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4537b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4538b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
45398554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4540481c6f50SPhilipp Reisner 		   for temporal network outages! */
45410500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4542d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45439f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4544b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4545b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
45469f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
454782bc0194SAndreas Gruenbacher 		return -EIO;
4548481c6f50SPhilipp Reisner 	}
4549b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4550b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
45510500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4552b411b363SPhilipp Reisner 
4553b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
45549f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
455582bc0194SAndreas Gruenbacher 		return -EIO;
4556b411b363SPhilipp Reisner 	}
4557b411b363SPhilipp Reisner 
45584ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
45594ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4560b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4561b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4562b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4563b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
456469a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
456569a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4566b411b363SPhilipp Reisner 		}
4567b411b363SPhilipp Reisner 	}
4568b411b363SPhilipp Reisner 
4569b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4570b411b363SPhilipp Reisner 
4571b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4572b411b363SPhilipp Reisner 
457382bc0194SAndreas Gruenbacher 	return 0;
4574b411b363SPhilipp Reisner }
4575b411b363SPhilipp Reisner 
4576bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4577b411b363SPhilipp Reisner {
45789f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4579b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4580e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
45814a76b161SAndreas Gruenbacher 
45829f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
45839f4fe9adSAndreas Gruenbacher 	if (!peer_device)
45844a76b161SAndreas Gruenbacher 		return -EIO;
45859f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4586b411b363SPhilipp Reisner 
4587b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4588b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4589b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4590b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4591b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4592b411b363SPhilipp Reisner 
45930b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4594b411b363SPhilipp Reisner 
4595b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4596b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4597b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4598b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4599b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4600b411b363SPhilipp Reisner 
4601b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4602b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4603b411b363SPhilipp Reisner 
4604b30ab791SAndreas Gruenbacher 		put_ldev(device);
4605b411b363SPhilipp Reisner 	} else
4606d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4607b411b363SPhilipp Reisner 
460882bc0194SAndreas Gruenbacher 	return 0;
4609b411b363SPhilipp Reisner }
4610b411b363SPhilipp Reisner 
46119b48ff07SLee Jones /*
46122c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46132c46407dSAndreas Gruenbacher  *
46142c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46152c46407dSAndreas Gruenbacher  * code upon failure.
46162c46407dSAndreas Gruenbacher  */
46172c46407dSAndreas Gruenbacher static int
461869a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4619e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4620b411b363SPhilipp Reisner {
462150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
462269a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4623e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
462450d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4625e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46262c46407dSAndreas Gruenbacher 	int err;
4627b411b363SPhilipp Reisner 
462850d0b1adSAndreas Gruenbacher 	if (want != size) {
462969a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46302c46407dSAndreas Gruenbacher 		return -EIO;
4631b411b363SPhilipp Reisner 	}
4632b411b363SPhilipp Reisner 	if (want == 0)
46332c46407dSAndreas Gruenbacher 		return 0;
463469a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
463582bc0194SAndreas Gruenbacher 	if (err)
46362c46407dSAndreas Gruenbacher 		return err;
4637b411b363SPhilipp Reisner 
463869a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4639b411b363SPhilipp Reisner 
4640b411b363SPhilipp Reisner 	c->word_offset += num_words;
4641b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4642b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4643b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4644b411b363SPhilipp Reisner 
46452c46407dSAndreas Gruenbacher 	return 1;
4646b411b363SPhilipp Reisner }
4647b411b363SPhilipp Reisner 
4648a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4649a02d1240SAndreas Gruenbacher {
4650a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4651a02d1240SAndreas Gruenbacher }
4652a02d1240SAndreas Gruenbacher 
4653a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4654a02d1240SAndreas Gruenbacher {
4655a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4656a02d1240SAndreas Gruenbacher }
4657a02d1240SAndreas Gruenbacher 
4658a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4659a02d1240SAndreas Gruenbacher {
4660a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4661a02d1240SAndreas Gruenbacher }
4662a02d1240SAndreas Gruenbacher 
46639b48ff07SLee Jones /*
46642c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
46652c46407dSAndreas Gruenbacher  *
46662c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46672c46407dSAndreas Gruenbacher  * code upon failure.
46682c46407dSAndreas Gruenbacher  */
46692c46407dSAndreas Gruenbacher static int
467069a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4671b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4672c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4673c6d25cfeSPhilipp Reisner 		 unsigned int len)
4674b411b363SPhilipp Reisner {
4675b411b363SPhilipp Reisner 	struct bitstream bs;
4676b411b363SPhilipp Reisner 	u64 look_ahead;
4677b411b363SPhilipp Reisner 	u64 rl;
4678b411b363SPhilipp Reisner 	u64 tmp;
4679b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4680b411b363SPhilipp Reisner 	unsigned long e;
4681a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4682b411b363SPhilipp Reisner 	int have;
4683b411b363SPhilipp Reisner 	int bits;
4684b411b363SPhilipp Reisner 
4685a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4686b411b363SPhilipp Reisner 
4687b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4688b411b363SPhilipp Reisner 	if (bits < 0)
46892c46407dSAndreas Gruenbacher 		return -EIO;
4690b411b363SPhilipp Reisner 
4691b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4692b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4693b411b363SPhilipp Reisner 		if (bits <= 0)
46942c46407dSAndreas Gruenbacher 			return -EIO;
4695b411b363SPhilipp Reisner 
4696b411b363SPhilipp Reisner 		if (toggle) {
4697b411b363SPhilipp Reisner 			e = s + rl -1;
4698b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
469969a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47002c46407dSAndreas Gruenbacher 				return -EIO;
4701b411b363SPhilipp Reisner 			}
470269a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4703b411b363SPhilipp Reisner 		}
4704b411b363SPhilipp Reisner 
4705b411b363SPhilipp Reisner 		if (have < bits) {
470669a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4707b411b363SPhilipp Reisner 				have, bits, look_ahead,
4708b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4709b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47102c46407dSAndreas Gruenbacher 			return -EIO;
4711b411b363SPhilipp Reisner 		}
4712d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4713d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4714b411b363SPhilipp Reisner 			look_ahead >>= bits;
4715d2da5b0cSLars Ellenberg 		else
4716d2da5b0cSLars Ellenberg 			look_ahead = 0;
4717b411b363SPhilipp Reisner 		have -= bits;
4718b411b363SPhilipp Reisner 
4719b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4720b411b363SPhilipp Reisner 		if (bits < 0)
47212c46407dSAndreas Gruenbacher 			return -EIO;
4722b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4723b411b363SPhilipp Reisner 		have += bits;
4724b411b363SPhilipp Reisner 	}
4725b411b363SPhilipp Reisner 
4726b411b363SPhilipp Reisner 	c->bit_offset = s;
4727b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4728b411b363SPhilipp Reisner 
47292c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4730b411b363SPhilipp Reisner }
4731b411b363SPhilipp Reisner 
47329b48ff07SLee Jones /*
47332c46407dSAndreas Gruenbacher  * decode_bitmap_c
47342c46407dSAndreas Gruenbacher  *
47352c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47362c46407dSAndreas Gruenbacher  * code upon failure.
47372c46407dSAndreas Gruenbacher  */
47382c46407dSAndreas Gruenbacher static int
473969a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4740b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4741c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4742c6d25cfeSPhilipp Reisner 		unsigned int len)
4743b411b363SPhilipp Reisner {
4744a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
474569a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4746b411b363SPhilipp Reisner 
4747b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4748b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4749b411b363SPhilipp Reisner 	 * during all our tests. */
4750b411b363SPhilipp Reisner 
475169a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
475269a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
47532c46407dSAndreas Gruenbacher 	return -EIO;
4754b411b363SPhilipp Reisner }
4755b411b363SPhilipp Reisner 
4756b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4757b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4758b411b363SPhilipp Reisner {
4759b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4760a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
476150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
476250d0b1adSAndreas Gruenbacher 	unsigned int plain =
476350d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
476450d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
476550d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
476650d0b1adSAndreas Gruenbacher 	unsigned int r;
4767b411b363SPhilipp Reisner 
4768b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4769b411b363SPhilipp Reisner 	if (total == 0)
4770b411b363SPhilipp Reisner 		return;
4771b411b363SPhilipp Reisner 
4772b411b363SPhilipp Reisner 	/* don't report if not compressed */
4773b411b363SPhilipp Reisner 	if (total >= plain)
4774b411b363SPhilipp Reisner 		return;
4775b411b363SPhilipp Reisner 
4776b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4777b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4778b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4779b411b363SPhilipp Reisner 
4780b411b363SPhilipp Reisner 	if (r > 1000)
4781b411b363SPhilipp Reisner 		r = 1000;
4782b411b363SPhilipp Reisner 
4783b411b363SPhilipp Reisner 	r = 1000 - r;
4784d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4785b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4786b411b363SPhilipp Reisner 			direction,
4787b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4788b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4789b411b363SPhilipp Reisner 			total, r/10, r % 10);
4790b411b363SPhilipp Reisner }
4791b411b363SPhilipp Reisner 
4792b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4793b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4794b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4795b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4796b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4797b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4798b411b363SPhilipp Reisner 
4799b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4800bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4801b411b363SPhilipp Reisner {
48029f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4803b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4804b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48052c46407dSAndreas Gruenbacher 	int err;
48064a76b161SAndreas Gruenbacher 
48079f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48089f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48094a76b161SAndreas Gruenbacher 		return -EIO;
48109f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4811b411b363SPhilipp Reisner 
4812b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
481320ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
481420ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4815b411b363SPhilipp Reisner 
4816b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4817b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4818b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4819b411b363SPhilipp Reisner 	};
4820b411b363SPhilipp Reisner 
48212c46407dSAndreas Gruenbacher 	for(;;) {
4822e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
482369a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4824e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4825b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4826b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4827e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4828b411b363SPhilipp Reisner 
4829bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4830d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
483182bc0194SAndreas Gruenbacher 				err = -EIO;
4832b411b363SPhilipp Reisner 				goto out;
4833b411b363SPhilipp Reisner 			}
4834e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4835d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
483682bc0194SAndreas Gruenbacher 				err = -EIO;
483778fcbdaeSAndreas Gruenbacher 				goto out;
4838b411b363SPhilipp Reisner 			}
48399f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4840e658983aSAndreas Gruenbacher 			if (err)
4841e658983aSAndreas Gruenbacher 			       goto out;
484269a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4843b411b363SPhilipp Reisner 		} else {
4844d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
484582bc0194SAndreas Gruenbacher 			err = -EIO;
4846b411b363SPhilipp Reisner 			goto out;
4847b411b363SPhilipp Reisner 		}
4848b411b363SPhilipp Reisner 
4849e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4850bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4851b411b363SPhilipp Reisner 
48522c46407dSAndreas Gruenbacher 		if (err <= 0) {
48532c46407dSAndreas Gruenbacher 			if (err < 0)
48542c46407dSAndreas Gruenbacher 				goto out;
4855b411b363SPhilipp Reisner 			break;
48562c46407dSAndreas Gruenbacher 		}
48579f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
485882bc0194SAndreas Gruenbacher 		if (err)
4859b411b363SPhilipp Reisner 			goto out;
48602c46407dSAndreas Gruenbacher 	}
4861b411b363SPhilipp Reisner 
4862b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4863b411b363SPhilipp Reisner 
4864b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4865de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4866de1f8e4aSAndreas Gruenbacher 
4867b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
486882bc0194SAndreas Gruenbacher 		if (err)
4869b411b363SPhilipp Reisner 			goto out;
4870b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4871b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
48720b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4873b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4874b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4875b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4876d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4877b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4878b411b363SPhilipp Reisner 	}
487982bc0194SAndreas Gruenbacher 	err = 0;
4880b411b363SPhilipp Reisner 
4881b411b363SPhilipp Reisner  out:
4882b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4883b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4884b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
488582bc0194SAndreas Gruenbacher 	return err;
4886b411b363SPhilipp Reisner }
4887b411b363SPhilipp Reisner 
4888bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4889b411b363SPhilipp Reisner {
48901ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4891e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4892b411b363SPhilipp Reisner 
4893bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4894b411b363SPhilipp Reisner }
4895b411b363SPhilipp Reisner 
4896bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4897b411b363SPhilipp Reisner {
4898b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4899b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4900ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
490182bc0194SAndreas Gruenbacher 	return 0;
4902b411b363SPhilipp Reisner }
4903b411b363SPhilipp Reisner 
4904bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
490573a01a18SPhilipp Reisner {
49069f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4907b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4908e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49094a76b161SAndreas Gruenbacher 
49109f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49119f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49124a76b161SAndreas Gruenbacher 		return -EIO;
49139f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
491473a01a18SPhilipp Reisner 
4915b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4916f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4917f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4918f735e363SLars Ellenberg 	case C_BEHIND:
4919f735e363SLars Ellenberg 			break;
4920f735e363SLars Ellenberg 	default:
4921d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4922b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4923f735e363SLars Ellenberg 	}
4924f735e363SLars Ellenberg 
4925b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
492673a01a18SPhilipp Reisner 
492782bc0194SAndreas Gruenbacher 	return 0;
492873a01a18SPhilipp Reisner }
492973a01a18SPhilipp Reisner 
4930700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4931700ca8c0SPhilipp Reisner {
4932700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4933700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4934700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4935700ca8c0SPhilipp Reisner 	sector_t sector;
4936700ca8c0SPhilipp Reisner 	int size, err = 0;
4937700ca8c0SPhilipp Reisner 
4938700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
4939700ca8c0SPhilipp Reisner 	if (!peer_device)
4940700ca8c0SPhilipp Reisner 		return -EIO;
4941700ca8c0SPhilipp Reisner 	device = peer_device->device;
4942700ca8c0SPhilipp Reisner 
4943700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
4944700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
4945700ca8c0SPhilipp Reisner 
4946700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
4947700ca8c0SPhilipp Reisner 
4948700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
4949700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
49509945172aSBart Van Assche 		const enum req_op op = REQ_OP_WRITE_ZEROES;
4951700ca8c0SPhilipp Reisner 
4952700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
49539104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
4954700ca8c0SPhilipp Reisner 		if (!peer_req) {
4955700ca8c0SPhilipp Reisner 			put_ldev(device);
4956700ca8c0SPhilipp Reisner 			return -ENOMEM;
4957700ca8c0SPhilipp Reisner 		}
4958700ca8c0SPhilipp Reisner 
4959700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
4960700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
4961f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
4962700ca8c0SPhilipp Reisner 
4963700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
4964700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
4965700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
4966700ca8c0SPhilipp Reisner 
4967700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
496886563de8SBart Van Assche 		err = drbd_submit_peer_request(device, peer_req, op,
496986563de8SBart Van Assche 					       DRBD_FAULT_RS_WR);
4970700ca8c0SPhilipp Reisner 
4971700ca8c0SPhilipp Reisner 		if (err) {
4972700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
4973700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
4974700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
4975700ca8c0SPhilipp Reisner 
4976700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
4977700ca8c0SPhilipp Reisner 			put_ldev(device);
4978700ca8c0SPhilipp Reisner 			err = 0;
4979700ca8c0SPhilipp Reisner 			goto fail;
4980700ca8c0SPhilipp Reisner 		}
4981700ca8c0SPhilipp Reisner 
4982700ca8c0SPhilipp Reisner 		inc_unacked(device);
4983700ca8c0SPhilipp Reisner 
4984700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4985700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
4986700ca8c0SPhilipp Reisner 	} else {
4987700ca8c0SPhilipp Reisner 	fail:
4988700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
4989700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
4990700ca8c0SPhilipp Reisner 	}
4991700ca8c0SPhilipp Reisner 
4992700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
4993700ca8c0SPhilipp Reisner 
4994700ca8c0SPhilipp Reisner 	return err;
4995700ca8c0SPhilipp Reisner }
4996700ca8c0SPhilipp Reisner 
499702918be2SPhilipp Reisner struct data_cmd {
499802918be2SPhilipp Reisner 	int expect_payload;
49999104d31aSLars Ellenberg 	unsigned int pkt_size;
5000bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5001b411b363SPhilipp Reisner };
5002b411b363SPhilipp Reisner 
500302918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
500402918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
500502918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
500602918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
500702918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5008e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5009e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5010e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
501102918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
501202918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5013e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5014e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
501502918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
501602918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
501702918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
501802918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
501902918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
502002918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
502102918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
502202918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
502302918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5024700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
502502918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
502673a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50274a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5028036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5029a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5030f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5031700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
503202918be2SPhilipp Reisner };
503302918be2SPhilipp Reisner 
5034bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5035b411b363SPhilipp Reisner {
503677351055SPhilipp Reisner 	struct packet_info pi;
503702918be2SPhilipp Reisner 	size_t shs; /* sub header size */
503882bc0194SAndreas Gruenbacher 	int err;
5039b411b363SPhilipp Reisner 
5040bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
50419104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5042deebe195SAndreas Gruenbacher 
5043bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5044c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5045c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
504602918be2SPhilipp Reisner 			goto err_out;
504702918be2SPhilipp Reisner 
5048deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
50494a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
50501ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
50512fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
505202918be2SPhilipp Reisner 			goto err_out;
50530b33a916SLars Ellenberg 		}
5054b411b363SPhilipp Reisner 
5055e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
50569104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
50579104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5058e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
50591ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
50602fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5061c13f7e1aSLars Ellenberg 			goto err_out;
5062c13f7e1aSLars Ellenberg 		}
50639104d31aSLars Ellenberg 		if (pi.size < shs) {
50649104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
50659104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
50669104d31aSLars Ellenberg 			goto err_out;
50679104d31aSLars Ellenberg 		}
5068c13f7e1aSLars Ellenberg 
5069c13f7e1aSLars Ellenberg 		if (shs) {
5070944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5071bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5072a5c31904SAndreas Gruenbacher 			if (err)
507302918be2SPhilipp Reisner 				goto err_out;
5074e2857216SAndreas Gruenbacher 			pi.size -= shs;
5075b411b363SPhilipp Reisner 		}
507602918be2SPhilipp Reisner 
5077944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5078bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
50794a76b161SAndreas Gruenbacher 		if (err) {
50801ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
50819f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
508202918be2SPhilipp Reisner 			goto err_out;
508302918be2SPhilipp Reisner 		}
508402918be2SPhilipp Reisner 	}
508582bc0194SAndreas Gruenbacher 	return;
508602918be2SPhilipp Reisner 
508702918be2SPhilipp Reisner     err_out:
5088bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5089b411b363SPhilipp Reisner }
5090b411b363SPhilipp Reisner 
5091bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5092f70b3511SPhilipp Reisner {
5093c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5094bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5095376694a0SPhilipp Reisner 	int vnr;
5096f70b3511SPhilipp Reisner 
5097bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5098b411b363SPhilipp Reisner 		return;
5099b411b363SPhilipp Reisner 
5100545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5101545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5102545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5103545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5104545752d5SLars Ellenberg 	 */
5105bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5106545752d5SLars Ellenberg 
5107668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51081c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5109668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5110668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5111668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5112668700b4SPhilipp Reisner 	}
5113bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5114360cc740SPhilipp Reisner 
5115c141ebdaSPhilipp Reisner 	rcu_read_lock();
5116c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5117c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5118b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5119c141ebdaSPhilipp Reisner 		rcu_read_unlock();
512069a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5121c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5122c141ebdaSPhilipp Reisner 		rcu_read_lock();
5123c141ebdaSPhilipp Reisner 	}
5124c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5125c141ebdaSPhilipp Reisner 
5126bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51271ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
512812038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5129bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5130bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
513112038a3aSPhilipp Reisner 
51321ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5133360cc740SPhilipp Reisner 
5134bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5135bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5136cb703454SPhilipp Reisner 
51370500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5138bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5139bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5140bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5141bbeb641cSPhilipp Reisner 
51420500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5143360cc740SPhilipp Reisner 
5144f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5145bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5146360cc740SPhilipp Reisner }
5147360cc740SPhilipp Reisner 
514869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5149360cc740SPhilipp Reisner {
515069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5151360cc740SPhilipp Reisner 	unsigned int i;
5152b411b363SPhilipp Reisner 
515385719573SPhilipp Reisner 	/* wait for current activity to cease. */
51540500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5155b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5156b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5157b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
51580500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5159b411b363SPhilipp Reisner 
5160b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5161b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5162b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5163b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5164b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5165b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5166b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5167b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5168b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5169b411b363SPhilipp Reisner 	 *  on the fly. */
5170b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5171b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5172b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5173b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5174b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5175b411b363SPhilipp Reisner 
5176b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
51772bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5178b411b363SPhilipp Reisner 
5179b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5180b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5181b411b363SPhilipp Reisner 	 * to be "canceled" */
5182b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5183b411b363SPhilipp Reisner 
5184b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5185b411b363SPhilipp Reisner 
5186d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5187d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5188d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5189b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5190d10b4ea3SPhilipp Reisner 
519108332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
519208332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5193b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5194b411b363SPhilipp Reisner 
5195b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5196b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5197b411b363SPhilipp Reisner 
5198b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
519969a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5200b411b363SPhilipp Reisner 
5201b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5202b411b363SPhilipp Reisner 
5203be115b69SLars Ellenberg 	if (get_ldev(device)) {
5204be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5205be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5206be115b69SLars Ellenberg 		put_ldev(device);
5207be115b69SLars Ellenberg 	}
520820ceb2b2SLars Ellenberg 
5209b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5210b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5211b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5212b411b363SPhilipp Reisner 	 *
5213b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5214b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5215b411b363SPhilipp Reisner 	 */
5216b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5217b411b363SPhilipp Reisner 	if (i)
5218d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5219b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5220435f0740SLars Ellenberg 	if (i)
5221d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5222b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5223b411b363SPhilipp Reisner 	if (i)
5224d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5225b411b363SPhilipp Reisner 
52260b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52270b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52280b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52290b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5230b411b363SPhilipp Reisner 
5231360cc740SPhilipp Reisner 	return 0;
5232b411b363SPhilipp Reisner }
5233b411b363SPhilipp Reisner 
5234b411b363SPhilipp Reisner /*
5235b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5236b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5237b411b363SPhilipp Reisner  *
5238b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5239b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5240b411b363SPhilipp Reisner  *
5241b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5242b411b363SPhilipp Reisner  */
5243bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5244b411b363SPhilipp Reisner {
52459f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
52469f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5247b411b363SPhilipp Reisner 
5248bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5249bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
52509f5bdc33SAndreas Gruenbacher 	if (!p)
5251e8d17b01SAndreas Gruenbacher 		return -EIO;
5252b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5253b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5254b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
525520c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5256bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5257b411b363SPhilipp Reisner }
5258b411b363SPhilipp Reisner 
5259b411b363SPhilipp Reisner /*
5260b411b363SPhilipp Reisner  * return values:
5261b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5262b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5263b411b363SPhilipp Reisner  *  -1 peer talks different language,
5264b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5265b411b363SPhilipp Reisner  */
5266bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5267b411b363SPhilipp Reisner {
5268bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5269e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5270e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
527177351055SPhilipp Reisner 	struct packet_info pi;
5272a5c31904SAndreas Gruenbacher 	int err;
5273b411b363SPhilipp Reisner 
5274bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5275e8d17b01SAndreas Gruenbacher 	if (err)
5276b411b363SPhilipp Reisner 		return 0;
5277b411b363SPhilipp Reisner 
5278bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
527969bc7bc3SAndreas Gruenbacher 	if (err)
5280b411b363SPhilipp Reisner 		return 0;
5281b411b363SPhilipp Reisner 
52826038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
52831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
528477351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5285b411b363SPhilipp Reisner 		return -1;
5286b411b363SPhilipp Reisner 	}
5287b411b363SPhilipp Reisner 
528877351055SPhilipp Reisner 	if (pi.size != expect) {
52891ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
529077351055SPhilipp Reisner 		     expect, pi.size);
5291b411b363SPhilipp Reisner 		return -1;
5292b411b363SPhilipp Reisner 	}
5293b411b363SPhilipp Reisner 
5294e658983aSAndreas Gruenbacher 	p = pi.data;
5295bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5296a5c31904SAndreas Gruenbacher 	if (err)
5297b411b363SPhilipp Reisner 		return 0;
5298b411b363SPhilipp Reisner 
5299b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5300b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5301b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5302b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5303b411b363SPhilipp Reisner 
5304b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5305b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5306b411b363SPhilipp Reisner 		goto incompat;
5307b411b363SPhilipp Reisner 
5308bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
530920c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5310b411b363SPhilipp Reisner 
53111ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5312bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5313b411b363SPhilipp Reisner 
5314f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53159104d31aSLars Ellenberg 		  connection->agreed_features,
53169104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53179104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5318f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5319f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53209104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
532192d94ae6SPhilipp Reisner 
5322b411b363SPhilipp Reisner 	return 1;
5323b411b363SPhilipp Reisner 
5324b411b363SPhilipp Reisner  incompat:
53251ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5326b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5327b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5328b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5329b411b363SPhilipp Reisner 	return -1;
5330b411b363SPhilipp Reisner }
5331b411b363SPhilipp Reisner 
5332b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5333bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5334b411b363SPhilipp Reisner {
53351ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53361ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5337b10d96cbSJohannes Thoma 	return -1;
5338b411b363SPhilipp Reisner }
5339b411b363SPhilipp Reisner #else
5340b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5341b10d96cbSJohannes Thoma 
5342b10d96cbSJohannes Thoma /* Return value:
5343b10d96cbSJohannes Thoma 	1 - auth succeeded,
5344b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5345b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5346b10d96cbSJohannes Thoma */
5347b10d96cbSJohannes Thoma 
5348bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5349b411b363SPhilipp Reisner {
53509f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5351b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5352b411b363SPhilipp Reisner 	char *response = NULL;
5353b411b363SPhilipp Reisner 	char *right_response = NULL;
5354b411b363SPhilipp Reisner 	char *peers_ch = NULL;
535544ed167dSPhilipp Reisner 	unsigned int key_len;
535644ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5357b411b363SPhilipp Reisner 	unsigned int resp_size;
535877ce56e2SArnd Bergmann 	struct shash_desc *desc;
535977351055SPhilipp Reisner 	struct packet_info pi;
536044ed167dSPhilipp Reisner 	struct net_conf *nc;
536169bc7bc3SAndreas Gruenbacher 	int err, rv;
5362b411b363SPhilipp Reisner 
53639f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
53649f5bdc33SAndreas Gruenbacher 
536544ed167dSPhilipp Reisner 	rcu_read_lock();
5366bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
536744ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
536844ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
536944ed167dSPhilipp Reisner 	rcu_read_unlock();
537044ed167dSPhilipp Reisner 
537177ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
537277ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
537377ce56e2SArnd Bergmann 		       GFP_KERNEL);
537477ce56e2SArnd Bergmann 	if (!desc) {
537577ce56e2SArnd Bergmann 		rv = -1;
537677ce56e2SArnd Bergmann 		goto fail;
537777ce56e2SArnd Bergmann 	}
53789534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5379b411b363SPhilipp Reisner 
53809534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5381b411b363SPhilipp Reisner 	if (rv) {
53829534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5383b10d96cbSJohannes Thoma 		rv = -1;
5384b411b363SPhilipp Reisner 		goto fail;
5385b411b363SPhilipp Reisner 	}
5386b411b363SPhilipp Reisner 
5387b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5388b411b363SPhilipp Reisner 
5389bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5390bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
53919f5bdc33SAndreas Gruenbacher 		rv = 0;
53929f5bdc33SAndreas Gruenbacher 		goto fail;
53939f5bdc33SAndreas Gruenbacher 	}
5394bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
53959f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5396b411b363SPhilipp Reisner 	if (!rv)
5397b411b363SPhilipp Reisner 		goto fail;
5398b411b363SPhilipp Reisner 
5399bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
540069bc7bc3SAndreas Gruenbacher 	if (err) {
5401b411b363SPhilipp Reisner 		rv = 0;
5402b411b363SPhilipp Reisner 		goto fail;
5403b411b363SPhilipp Reisner 	}
5404b411b363SPhilipp Reisner 
540577351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54061ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
540777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54089049ccd4SLars Ellenberg 		rv = -1;
5409b411b363SPhilipp Reisner 		goto fail;
5410b411b363SPhilipp Reisner 	}
5411b411b363SPhilipp Reisner 
541277351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54131ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5414b10d96cbSJohannes Thoma 		rv = -1;
5415b411b363SPhilipp Reisner 		goto fail;
5416b411b363SPhilipp Reisner 	}
5417b411b363SPhilipp Reisner 
541867cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
541967cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
542067cca286SPhilipp Reisner 		rv = -1;
542167cca286SPhilipp Reisner 		goto fail;
542267cca286SPhilipp Reisner 	}
542367cca286SPhilipp Reisner 
542477351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
54258404e191SZhen Lei 	if (!peers_ch) {
5426b10d96cbSJohannes Thoma 		rv = -1;
5427b411b363SPhilipp Reisner 		goto fail;
5428b411b363SPhilipp Reisner 	}
5429b411b363SPhilipp Reisner 
5430bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5431a5c31904SAndreas Gruenbacher 	if (err) {
5432b411b363SPhilipp Reisner 		rv = 0;
5433b411b363SPhilipp Reisner 		goto fail;
5434b411b363SPhilipp Reisner 	}
5435b411b363SPhilipp Reisner 
543667cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
543767cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
543867cca286SPhilipp Reisner 		rv = -1;
543967cca286SPhilipp Reisner 		goto fail;
544067cca286SPhilipp Reisner 	}
544167cca286SPhilipp Reisner 
54429534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5443b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
54448404e191SZhen Lei 	if (!response) {
5445b10d96cbSJohannes Thoma 		rv = -1;
5446b411b363SPhilipp Reisner 		goto fail;
5447b411b363SPhilipp Reisner 	}
5448b411b363SPhilipp Reisner 
54499534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5450b411b363SPhilipp Reisner 	if (rv) {
54511ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5452b10d96cbSJohannes Thoma 		rv = -1;
5453b411b363SPhilipp Reisner 		goto fail;
5454b411b363SPhilipp Reisner 	}
5455b411b363SPhilipp Reisner 
5456bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54579f5bdc33SAndreas Gruenbacher 		rv = 0;
54589f5bdc33SAndreas Gruenbacher 		goto fail;
54599f5bdc33SAndreas Gruenbacher 	}
5460bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
54619f5bdc33SAndreas Gruenbacher 				response, resp_size);
5462b411b363SPhilipp Reisner 	if (!rv)
5463b411b363SPhilipp Reisner 		goto fail;
5464b411b363SPhilipp Reisner 
5465bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
546669bc7bc3SAndreas Gruenbacher 	if (err) {
5467b411b363SPhilipp Reisner 		rv = 0;
5468b411b363SPhilipp Reisner 		goto fail;
5469b411b363SPhilipp Reisner 	}
5470b411b363SPhilipp Reisner 
547177351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
54721ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
547377351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5474b411b363SPhilipp Reisner 		rv = 0;
5475b411b363SPhilipp Reisner 		goto fail;
5476b411b363SPhilipp Reisner 	}
5477b411b363SPhilipp Reisner 
547877351055SPhilipp Reisner 	if (pi.size != resp_size) {
54791ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5480b411b363SPhilipp Reisner 		rv = 0;
5481b411b363SPhilipp Reisner 		goto fail;
5482b411b363SPhilipp Reisner 	}
5483b411b363SPhilipp Reisner 
5484bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5485a5c31904SAndreas Gruenbacher 	if (err) {
5486b411b363SPhilipp Reisner 		rv = 0;
5487b411b363SPhilipp Reisner 		goto fail;
5488b411b363SPhilipp Reisner 	}
5489b411b363SPhilipp Reisner 
5490b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
54918404e191SZhen Lei 	if (!right_response) {
5492b10d96cbSJohannes Thoma 		rv = -1;
5493b411b363SPhilipp Reisner 		goto fail;
5494b411b363SPhilipp Reisner 	}
5495b411b363SPhilipp Reisner 
54969534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
54979534d671SHerbert Xu 				 right_response);
5498b411b363SPhilipp Reisner 	if (rv) {
54991ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5500b10d96cbSJohannes Thoma 		rv = -1;
5501b411b363SPhilipp Reisner 		goto fail;
5502b411b363SPhilipp Reisner 	}
5503b411b363SPhilipp Reisner 
5504b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5505b411b363SPhilipp Reisner 
5506b411b363SPhilipp Reisner 	if (rv)
55071ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
550844ed167dSPhilipp Reisner 		     resp_size);
5509b10d96cbSJohannes Thoma 	else
5510b10d96cbSJohannes Thoma 		rv = -1;
5511b411b363SPhilipp Reisner 
5512b411b363SPhilipp Reisner  fail:
5513b411b363SPhilipp Reisner 	kfree(peers_ch);
5514b411b363SPhilipp Reisner 	kfree(response);
5515b411b363SPhilipp Reisner 	kfree(right_response);
551677ce56e2SArnd Bergmann 	if (desc) {
55179534d671SHerbert Xu 		shash_desc_zero(desc);
551877ce56e2SArnd Bergmann 		kfree(desc);
551977ce56e2SArnd Bergmann 	}
5520b411b363SPhilipp Reisner 
5521b411b363SPhilipp Reisner 	return rv;
5522b411b363SPhilipp Reisner }
5523b411b363SPhilipp Reisner #endif
5524b411b363SPhilipp Reisner 
55258fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5526b411b363SPhilipp Reisner {
5527bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5528b411b363SPhilipp Reisner 	int h;
5529b411b363SPhilipp Reisner 
55301ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5531b411b363SPhilipp Reisner 
5532b411b363SPhilipp Reisner 	do {
5533bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5534b411b363SPhilipp Reisner 		if (h == 0) {
5535bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
553620ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5537b411b363SPhilipp Reisner 		}
5538b411b363SPhilipp Reisner 		if (h == -1) {
55391ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5540bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5541b411b363SPhilipp Reisner 		}
5542b411b363SPhilipp Reisner 	} while (h == 0);
5543b411b363SPhilipp Reisner 
5544c51a0ef3SLars Ellenberg 	if (h > 0) {
5545c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5546bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5547c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5548c51a0ef3SLars Ellenberg 	}
5549b411b363SPhilipp Reisner 
5550bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5551b411b363SPhilipp Reisner 
55521ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5553b411b363SPhilipp Reisner 	return 0;
5554b411b363SPhilipp Reisner }
5555b411b363SPhilipp Reisner 
5556b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5557b411b363SPhilipp Reisner 
5558bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5559b411b363SPhilipp Reisner {
5560e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5561b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5562b411b363SPhilipp Reisner 
5563b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5564bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5565b411b363SPhilipp Reisner 	} else {
5566bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
55671ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5568fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5569fc3b10a4SPhilipp Reisner 	}
5570bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5571e4f78edeSPhilipp Reisner 
55722735a594SAndreas Gruenbacher 	return 0;
5573fc3b10a4SPhilipp Reisner }
5574e4f78edeSPhilipp Reisner 
5575bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5576e4f78edeSPhilipp Reisner {
55779f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5578b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5579e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5580e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5581e4f78edeSPhilipp Reisner 
55829f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
55839f4fe9adSAndreas Gruenbacher 	if (!peer_device)
55842735a594SAndreas Gruenbacher 		return -EIO;
55859f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
55861952e916SAndreas Gruenbacher 
5587bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
55880b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5589bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
55904d0fc3fdSPhilipp Reisner 	}
55914d0fc3fdSPhilipp Reisner 
5592e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5593b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5594e4f78edeSPhilipp Reisner 	} else {
5595b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5596d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5597b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5598b411b363SPhilipp Reisner 	}
5599b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5600b411b363SPhilipp Reisner 
56012735a594SAndreas Gruenbacher 	return 0;
5602b411b363SPhilipp Reisner }
5603b411b363SPhilipp Reisner 
5604bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5605b411b363SPhilipp Reisner {
5606bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5607b411b363SPhilipp Reisner 
5608b411b363SPhilipp Reisner }
5609b411b363SPhilipp Reisner 
5610bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5611b411b363SPhilipp Reisner {
5612b411b363SPhilipp Reisner 	/* restore idle timeout */
5613bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5614bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5615bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5616b411b363SPhilipp Reisner 
56172735a594SAndreas Gruenbacher 	return 0;
5618b411b363SPhilipp Reisner }
5619b411b363SPhilipp Reisner 
5620bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5621b411b363SPhilipp Reisner {
56229f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5623b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5624e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5625b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5626b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5627b411b363SPhilipp Reisner 
56289f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56299f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56302735a594SAndreas Gruenbacher 		return -EIO;
56319f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56321952e916SAndreas Gruenbacher 
56339f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5634b411b363SPhilipp Reisner 
563569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5636b411b363SPhilipp Reisner 
5637b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5638b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5639b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5640b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5641b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5642b30ab791SAndreas Gruenbacher 		put_ldev(device);
56431d53f09eSLars Ellenberg 	}
5644b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5645b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5646b411b363SPhilipp Reisner 
56472735a594SAndreas Gruenbacher 	return 0;
5648b411b363SPhilipp Reisner }
5649b411b363SPhilipp Reisner 
5650bc9c5c41SAndreas Gruenbacher static int
5651b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5652bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5653bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5654b411b363SPhilipp Reisner {
5655b411b363SPhilipp Reisner 	struct drbd_request *req;
5656b411b363SPhilipp Reisner 	struct bio_and_error m;
5657b411b363SPhilipp Reisner 
56580500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5659b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5660b411b363SPhilipp Reisner 	if (unlikely(!req)) {
56610500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
566285997675SAndreas Gruenbacher 		return -EIO;
5663b411b363SPhilipp Reisner 	}
5664b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
56650500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5666b411b363SPhilipp Reisner 
5667b411b363SPhilipp Reisner 	if (m.bio)
5668b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
566985997675SAndreas Gruenbacher 	return 0;
5670b411b363SPhilipp Reisner }
5671b411b363SPhilipp Reisner 
5672bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5673b411b363SPhilipp Reisner {
56749f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5675b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5676e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5677b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5678b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5679b411b363SPhilipp Reisner 	enum drbd_req_event what;
5680b411b363SPhilipp Reisner 
56819f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56829f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56832735a594SAndreas Gruenbacher 		return -EIO;
56849f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56851952e916SAndreas Gruenbacher 
568669a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5687b411b363SPhilipp Reisner 
5688579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5689b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5690b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
56912735a594SAndreas Gruenbacher 		return 0;
5692b411b363SPhilipp Reisner 	}
5693e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5694b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
56958554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5696b411b363SPhilipp Reisner 		break;
5697b411b363SPhilipp Reisner 	case P_WRITE_ACK:
56988554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5699b411b363SPhilipp Reisner 		break;
5700b411b363SPhilipp Reisner 	case P_RECV_ACK:
57018554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5702b411b363SPhilipp Reisner 		break;
5703d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5704d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57057be8da07SAndreas Gruenbacher 		break;
57067be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57077be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5708b411b363SPhilipp Reisner 		break;
5709b411b363SPhilipp Reisner 	default:
57102735a594SAndreas Gruenbacher 		BUG();
5711b411b363SPhilipp Reisner 	}
5712b411b363SPhilipp Reisner 
5713b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5714b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5715bc9c5c41SAndreas Gruenbacher 					     what, false);
5716b411b363SPhilipp Reisner }
5717b411b363SPhilipp Reisner 
5718bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5719b411b363SPhilipp Reisner {
57209f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5721b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5722e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5723b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57242deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
572585997675SAndreas Gruenbacher 	int err;
5726b411b363SPhilipp Reisner 
57279f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57289f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57292735a594SAndreas Gruenbacher 		return -EIO;
57309f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5731b411b363SPhilipp Reisner 
573269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5733b411b363SPhilipp Reisner 
5734579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5735b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5736b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
57372735a594SAndreas Gruenbacher 		return 0;
5738b411b363SPhilipp Reisner 	}
57392deb8336SPhilipp Reisner 
5740b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5741b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5742303d1448SPhilipp Reisner 					    NEG_ACKED, true);
574385997675SAndreas Gruenbacher 	if (err) {
57442deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
57452deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5746c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
57472deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
57482deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5749b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
57502deb8336SPhilipp Reisner 	}
57512735a594SAndreas Gruenbacher 	return 0;
5752b411b363SPhilipp Reisner }
5753b411b363SPhilipp Reisner 
5754bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5755b411b363SPhilipp Reisner {
57569f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5757b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5758e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5759b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5760b411b363SPhilipp Reisner 
57619f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57629f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57632735a594SAndreas Gruenbacher 		return -EIO;
57649f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57651952e916SAndreas Gruenbacher 
576669a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
57677be8da07SAndreas Gruenbacher 
5768d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5769b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5770b411b363SPhilipp Reisner 
5771b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5772b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
57738554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5774b411b363SPhilipp Reisner }
5775b411b363SPhilipp Reisner 
5776bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5777b411b363SPhilipp Reisner {
57789f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5779b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5780b411b363SPhilipp Reisner 	sector_t sector;
5781b411b363SPhilipp Reisner 	int size;
5782e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
57831952e916SAndreas Gruenbacher 
57849f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57859f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57862735a594SAndreas Gruenbacher 		return -EIO;
57879f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5788b411b363SPhilipp Reisner 
5789b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5790b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5791b411b363SPhilipp Reisner 
579269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5793b411b363SPhilipp Reisner 
5794b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5795b411b363SPhilipp Reisner 
5796b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5797b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5798e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5799d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5800b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
58016327c911SGustavo A. R. Silva 			break;
5802d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5803d612d309SPhilipp Reisner 			break;
5804d612d309SPhilipp Reisner 		default:
58052735a594SAndreas Gruenbacher 			BUG();
5806d612d309SPhilipp Reisner 		}
5807b30ab791SAndreas Gruenbacher 		put_ldev(device);
5808b411b363SPhilipp Reisner 	}
5809b411b363SPhilipp Reisner 
58102735a594SAndreas Gruenbacher 	return 0;
5811b411b363SPhilipp Reisner }
5812b411b363SPhilipp Reisner 
5813bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5814b411b363SPhilipp Reisner {
5815e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5816c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58179ed57dcbSLars Ellenberg 	int vnr;
5818b411b363SPhilipp Reisner 
5819bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5820b411b363SPhilipp Reisner 
58219ed57dcbSLars Ellenberg 	rcu_read_lock();
5822c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5823c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5824c06ece6bSAndreas Gruenbacher 
5825b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5826b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5827b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5828b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5829b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5830c4752ef1SPhilipp Reisner 		}
58319ed57dcbSLars Ellenberg 	}
58329ed57dcbSLars Ellenberg 	rcu_read_unlock();
5833c4752ef1SPhilipp Reisner 
58342735a594SAndreas Gruenbacher 	return 0;
5835b411b363SPhilipp Reisner }
5836b411b363SPhilipp Reisner 
5837bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5838b411b363SPhilipp Reisner {
58399f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5840b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5841e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
584284b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5843b411b363SPhilipp Reisner 	sector_t sector;
5844b411b363SPhilipp Reisner 	int size;
5845b411b363SPhilipp Reisner 
58469f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58479f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58482735a594SAndreas Gruenbacher 		return -EIO;
58499f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58501952e916SAndreas Gruenbacher 
5851b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5852b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5853b411b363SPhilipp Reisner 
585469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5855b411b363SPhilipp Reisner 
5856b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5857b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5858b411b363SPhilipp Reisner 	else
5859b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5860b411b363SPhilipp Reisner 
5861b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
58622735a594SAndreas Gruenbacher 		return 0;
58631d53f09eSLars Ellenberg 
5864b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5865b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5866b411b363SPhilipp Reisner 
5867b30ab791SAndreas Gruenbacher 	--device->ov_left;
5868ea5442afSLars Ellenberg 
5869ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5870b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5871b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5872ea5442afSLars Ellenberg 
5873b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
587484b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
587584b8c06bSAndreas Gruenbacher 		if (dw) {
587684b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
587784b8c06bSAndreas Gruenbacher 			dw->device = device;
587884b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5879b411b363SPhilipp Reisner 		} else {
588084b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5881b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5882b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5883b411b363SPhilipp Reisner 		}
5884b411b363SPhilipp Reisner 	}
5885b30ab791SAndreas Gruenbacher 	put_ldev(device);
58862735a594SAndreas Gruenbacher 	return 0;
5887b411b363SPhilipp Reisner }
5888b411b363SPhilipp Reisner 
5889bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
58900ced55a3SPhilipp Reisner {
58912735a594SAndreas Gruenbacher 	return 0;
58920ced55a3SPhilipp Reisner }
58930ced55a3SPhilipp Reisner 
5894668700b4SPhilipp Reisner struct meta_sock_cmd {
5895b411b363SPhilipp Reisner 	size_t pkt_size;
5896bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5897b411b363SPhilipp Reisner };
5898b411b363SPhilipp Reisner 
5899668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5900668700b4SPhilipp Reisner {
5901668700b4SPhilipp Reisner 	long t;
5902668700b4SPhilipp Reisner 	struct net_conf *nc;
5903668700b4SPhilipp Reisner 
5904668700b4SPhilipp Reisner 	rcu_read_lock();
5905668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5906668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5907668700b4SPhilipp Reisner 	rcu_read_unlock();
5908668700b4SPhilipp Reisner 
5909668700b4SPhilipp Reisner 	t *= HZ;
5910668700b4SPhilipp Reisner 	if (ping_timeout)
5911668700b4SPhilipp Reisner 		t /= 10;
5912668700b4SPhilipp Reisner 
5913668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5914668700b4SPhilipp Reisner }
5915668700b4SPhilipp Reisner 
5916668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5917668700b4SPhilipp Reisner {
5918668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5919668700b4SPhilipp Reisner }
5920668700b4SPhilipp Reisner 
5921668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5922668700b4SPhilipp Reisner {
5923668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5924668700b4SPhilipp Reisner }
5925668700b4SPhilipp Reisner 
5926668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5927e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5928e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5929b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5930b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5931b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5932d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5933b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5934b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5935b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5936b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5937b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5938b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5939b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
594002918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5941d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59421952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59431952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5944b411b363SPhilipp Reisner };
5945b411b363SPhilipp Reisner 
59461c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5947b411b363SPhilipp Reisner {
5948bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5949668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
595077351055SPhilipp Reisner 	struct packet_info pi;
5951668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5952257d0af6SPhilipp Reisner 	int rv;
5953bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5954b411b363SPhilipp Reisner 	int received = 0;
5955bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
595652b061a4SAndreas Gruenbacher 	int expect   = header_size;
595744ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
5958b411b363SPhilipp Reisner 
59598b700983SPeter Zijlstra 	sched_set_fifo_low(current);
5960b411b363SPhilipp Reisner 
5961e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
596280822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
596344ed167dSPhilipp Reisner 
5964668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
596544ed167dSPhilipp Reisner 
5966bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5967bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
59681ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
5969841ce241SAndreas Gruenbacher 				goto reconnect;
5970841ce241SAndreas Gruenbacher 			}
5971668700b4SPhilipp Reisner 			set_ping_timeout(connection);
597244ed167dSPhilipp Reisner 			ping_timeout_active = true;
5973b411b363SPhilipp Reisner 		}
5974b411b363SPhilipp Reisner 
5975668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
5976bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5977b411b363SPhilipp Reisner 
5978b411b363SPhilipp Reisner 		/* Note:
5979b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5980b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5981b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5982b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5983b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5984b411b363SPhilipp Reisner 		 * rv == expected: full header or command
5985b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
5986b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
5987b411b363SPhilipp Reisner 		 */
5988b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
5989b411b363SPhilipp Reisner 			received += rv;
5990b411b363SPhilipp Reisner 			buf	 += rv;
5991b411b363SPhilipp Reisner 		} else if (rv == 0) {
5992bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
5993b66623e3SPhilipp Reisner 				long t;
5994b66623e3SPhilipp Reisner 				rcu_read_lock();
5995bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
5996b66623e3SPhilipp Reisner 				rcu_read_unlock();
5997b66623e3SPhilipp Reisner 
5998bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
5999bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6000b66623e3SPhilipp Reisner 						       t);
6001599377acSPhilipp Reisner 				if (t)
6002599377acSPhilipp Reisner 					break;
6003599377acSPhilipp Reisner 			}
60041ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6005b411b363SPhilipp Reisner 			goto reconnect;
6006b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6007cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6008cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6009668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6010cb6518cbSLars Ellenberg 				continue;
6011f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60121ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6013b411b363SPhilipp Reisner 				goto reconnect;
6014b411b363SPhilipp Reisner 			}
6015bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6016b411b363SPhilipp Reisner 			continue;
6017b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6018668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6019668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6020668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6021668700b4SPhilipp Reisner 			flush_signals(current);
6022b411b363SPhilipp Reisner 			continue;
6023b411b363SPhilipp Reisner 		} else {
60241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6025b411b363SPhilipp Reisner 			goto reconnect;
6026b411b363SPhilipp Reisner 		}
6027b411b363SPhilipp Reisner 
6028b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6029bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6030b411b363SPhilipp Reisner 				goto reconnect;
6031668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6032668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60331ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60342fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6035b411b363SPhilipp Reisner 				goto disconnect;
6036b411b363SPhilipp Reisner 			}
6037e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
603852b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60391ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
604077351055SPhilipp Reisner 					pi.cmd, pi.size);
6041b411b363SPhilipp Reisner 				goto reconnect;
6042b411b363SPhilipp Reisner 			}
6043257d0af6SPhilipp Reisner 		}
6044b411b363SPhilipp Reisner 		if (received == expect) {
60452735a594SAndreas Gruenbacher 			bool err;
6046a4fbda8eSPhilipp Reisner 
6047bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
60482735a594SAndreas Gruenbacher 			if (err) {
6049d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6050b411b363SPhilipp Reisner 				goto reconnect;
60511952e916SAndreas Gruenbacher 			}
6052b411b363SPhilipp Reisner 
6053bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6054f36af18cSLars Ellenberg 
6055668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6056668700b4SPhilipp Reisner 				set_idle_timeout(connection);
605744ed167dSPhilipp Reisner 				ping_timeout_active = false;
605844ed167dSPhilipp Reisner 			}
6059b411b363SPhilipp Reisner 
6060bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6061b411b363SPhilipp Reisner 			received = 0;
606252b061a4SAndreas Gruenbacher 			expect	 = header_size;
6063b411b363SPhilipp Reisner 			cmd	 = NULL;
6064b411b363SPhilipp Reisner 		}
6065b411b363SPhilipp Reisner 	}
6066b411b363SPhilipp Reisner 
6067b411b363SPhilipp Reisner 	if (0) {
6068b411b363SPhilipp Reisner reconnect:
6069bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6070bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6071b411b363SPhilipp Reisner 	}
6072b411b363SPhilipp Reisner 	if (0) {
6073b411b363SPhilipp Reisner disconnect:
6074bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6075b411b363SPhilipp Reisner 	}
6076b411b363SPhilipp Reisner 
6077668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6078b411b363SPhilipp Reisner 
6079b411b363SPhilipp Reisner 	return 0;
6080b411b363SPhilipp Reisner }
6081668700b4SPhilipp Reisner 
6082668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6083668700b4SPhilipp Reisner {
6084668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6085668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6086668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6087668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6088668700b4SPhilipp Reisner 	struct net_conf *nc;
6089668700b4SPhilipp Reisner 	int tcp_cork, err;
6090668700b4SPhilipp Reisner 
6091668700b4SPhilipp Reisner 	rcu_read_lock();
6092668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6093668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6094668700b4SPhilipp Reisner 	rcu_read_unlock();
6095668700b4SPhilipp Reisner 
6096668700b4SPhilipp Reisner 	if (tcp_cork)
6097db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6098668700b4SPhilipp Reisner 
6099668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6100668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6101668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6102668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6103668700b4SPhilipp Reisner 
6104668700b4SPhilipp Reisner 	if (err) {
6105668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6106668700b4SPhilipp Reisner 		return;
6107668700b4SPhilipp Reisner 	}
6108668700b4SPhilipp Reisner 
6109668700b4SPhilipp Reisner 	if (tcp_cork)
6110db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6111668700b4SPhilipp Reisner 
6112668700b4SPhilipp Reisner 	return;
6113668700b4SPhilipp Reisner }
6114