1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
37b411b363SPhilipp Reisner #include "drbd_int.h"
38a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner #include "drbd_vli.h"
41b411b363SPhilipp Reisner 
42f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4320c68fdeSLars Ellenberg 
4477351055SPhilipp Reisner struct packet_info {
4577351055SPhilipp Reisner 	enum drbd_packet cmd;
46e2857216SAndreas Gruenbacher 	unsigned int size;
47e2857216SAndreas Gruenbacher 	unsigned int vnr;
48e658983aSAndreas Gruenbacher 	void *data;
4977351055SPhilipp Reisner };
5077351055SPhilipp Reisner 
51b411b363SPhilipp Reisner enum finish_epoch {
52b411b363SPhilipp Reisner 	FE_STILL_LIVE,
53b411b363SPhilipp Reisner 	FE_DESTROYED,
54b411b363SPhilipp Reisner 	FE_RECYCLED,
55b411b363SPhilipp Reisner };
56b411b363SPhilipp Reisner 
57bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
58bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5969a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
60a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
61bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6299920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
66b411b363SPhilipp Reisner 
6745bb912bSLars Ellenberg /*
6845bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6945bb912bSLars Ellenberg  * page->private being our "next" pointer.
7045bb912bSLars Ellenberg  */
7145bb912bSLars Ellenberg 
7245bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7345bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7445bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7545bb912bSLars Ellenberg  */
7645bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7745bb912bSLars Ellenberg {
7845bb912bSLars Ellenberg 	struct page *page;
7945bb912bSLars Ellenberg 	struct page *tmp;
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg 	BUG_ON(!n);
8245bb912bSLars Ellenberg 	BUG_ON(!head);
8345bb912bSLars Ellenberg 
8445bb912bSLars Ellenberg 	page = *head;
8523ce4227SPhilipp Reisner 
8623ce4227SPhilipp Reisner 	if (!page)
8723ce4227SPhilipp Reisner 		return NULL;
8823ce4227SPhilipp Reisner 
8945bb912bSLars Ellenberg 	while (page) {
9045bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9145bb912bSLars Ellenberg 		if (--n == 0)
9245bb912bSLars Ellenberg 			break; /* found sufficient pages */
9345bb912bSLars Ellenberg 		if (tmp == NULL)
9445bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9545bb912bSLars Ellenberg 			return NULL;
9645bb912bSLars Ellenberg 		page = tmp;
9745bb912bSLars Ellenberg 	}
9845bb912bSLars Ellenberg 
9945bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10045bb912bSLars Ellenberg 	set_page_private(page, 0);
10145bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10245bb912bSLars Ellenberg 	page = *head;
10345bb912bSLars Ellenberg 	*head = tmp;
10445bb912bSLars Ellenberg 	return page;
10545bb912bSLars Ellenberg }
10645bb912bSLars Ellenberg 
10745bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10845bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10945bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11045bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11145bb912bSLars Ellenberg {
11245bb912bSLars Ellenberg 	struct page *tmp;
11345bb912bSLars Ellenberg 	int i = 1;
114e8628013SJoe Perches 	while ((tmp = page_chain_next(page))) {
115e8628013SJoe Perches 		++i;
116e8628013SJoe Perches 		page = tmp;
117e8628013SJoe Perches 	}
11845bb912bSLars Ellenberg 	if (len)
11945bb912bSLars Ellenberg 		*len = i;
12045bb912bSLars Ellenberg 	return page;
12145bb912bSLars Ellenberg }
12245bb912bSLars Ellenberg 
12345bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12445bb912bSLars Ellenberg {
12545bb912bSLars Ellenberg 	struct page *tmp;
12645bb912bSLars Ellenberg 	int i = 0;
12745bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12845bb912bSLars Ellenberg 		put_page(page);
12945bb912bSLars Ellenberg 		++i;
13045bb912bSLars Ellenberg 	}
13145bb912bSLars Ellenberg 	return i;
13245bb912bSLars Ellenberg }
13345bb912bSLars Ellenberg 
13445bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13545bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13645bb912bSLars Ellenberg {
13745bb912bSLars Ellenberg #if 1
13845bb912bSLars Ellenberg 	struct page *tmp;
13945bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14045bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14145bb912bSLars Ellenberg #endif
14245bb912bSLars Ellenberg 
14345bb912bSLars Ellenberg 	/* add chain to head */
14445bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14545bb912bSLars Ellenberg 	*head = chain_first;
14645bb912bSLars Ellenberg }
14745bb912bSLars Ellenberg 
148b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14918c2d522SAndreas Gruenbacher 				       unsigned int number)
150b411b363SPhilipp Reisner {
151b411b363SPhilipp Reisner 	struct page *page = NULL;
15245bb912bSLars Ellenberg 	struct page *tmp = NULL;
15318c2d522SAndreas Gruenbacher 	unsigned int i = 0;
154b411b363SPhilipp Reisner 
155b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
156b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15745bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
158b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15945bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16045bb912bSLars Ellenberg 		if (page)
16145bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
162b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16345bb912bSLars Ellenberg 		if (page)
16445bb912bSLars Ellenberg 			return page;
165b411b363SPhilipp Reisner 	}
16645bb912bSLars Ellenberg 
167b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
168b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
169b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17045bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17145bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17245bb912bSLars Ellenberg 		if (!tmp)
17345bb912bSLars Ellenberg 			break;
17445bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17545bb912bSLars Ellenberg 		page = tmp;
17645bb912bSLars Ellenberg 	}
17745bb912bSLars Ellenberg 
17845bb912bSLars Ellenberg 	if (i == number)
179b411b363SPhilipp Reisner 		return page;
18045bb912bSLars Ellenberg 
18145bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
182c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18345bb912bSLars Ellenberg 	 * function "soon". */
18445bb912bSLars Ellenberg 	if (page) {
18545bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18645bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18745bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18845bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18945bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19045bb912bSLars Ellenberg 	}
19145bb912bSLars Ellenberg 	return NULL;
192b411b363SPhilipp Reisner }
193b411b363SPhilipp Reisner 
194b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
195a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
196b411b363SPhilipp Reisner {
197a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
198b411b363SPhilipp Reisner 
199b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
200b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
201b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
202b411b363SPhilipp Reisner 	   stop to examine the list... */
203b411b363SPhilipp Reisner 
204a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
205045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
206b411b363SPhilipp Reisner 			break;
207a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
208b411b363SPhilipp Reisner 	}
209b411b363SPhilipp Reisner }
210b411b363SPhilipp Reisner 
211668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
212b411b363SPhilipp Reisner {
213b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
214db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
215b411b363SPhilipp Reisner 
2160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
217b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
219a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
220b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
221b411b363SPhilipp Reisner }
222b411b363SPhilipp Reisner 
223668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
224668700b4SPhilipp Reisner {
225668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
226668700b4SPhilipp Reisner 	int vnr;
227668700b4SPhilipp Reisner 
228668700b4SPhilipp Reisner 	rcu_read_lock();
229668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
230668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
231668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
232668700b4SPhilipp Reisner 			continue;
233668700b4SPhilipp Reisner 
234668700b4SPhilipp Reisner 		kref_get(&device->kref);
235668700b4SPhilipp Reisner 		rcu_read_unlock();
236668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
237668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
238668700b4SPhilipp Reisner 		rcu_read_lock();
239668700b4SPhilipp Reisner 	}
240668700b4SPhilipp Reisner 	rcu_read_unlock();
241668700b4SPhilipp Reisner }
242668700b4SPhilipp Reisner 
243b411b363SPhilipp Reisner /**
244c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2459b48ff07SLee Jones  * @peer_device:	DRBD device.
24645bb912bSLars Ellenberg  * @number:		number of pages requested
24745bb912bSLars Ellenberg  * @retry:		whether to retry, if not enough pages are available right now
248b411b363SPhilipp Reisner  *
24945bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2500e49d7b0SLars Ellenberg  * the kernel.
25145bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25245bb912bSLars Ellenberg  *
2530e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2540e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2550e49d7b0SLars Ellenberg  *
2560e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2570e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2580e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2590e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2600e49d7b0SLars Ellenberg  *
26145bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
262b411b363SPhilipp Reisner  */
26369a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
264c37c8ecfSAndreas Gruenbacher 			      bool retry)
265b411b363SPhilipp Reisner {
26669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
267b411b363SPhilipp Reisner 	struct page *page = NULL;
26844ed167dSPhilipp Reisner 	struct net_conf *nc;
269b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2700e49d7b0SLars Ellenberg 	unsigned int mxb;
271b411b363SPhilipp Reisner 
27244ed167dSPhilipp Reisner 	rcu_read_lock();
27369a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27444ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27544ed167dSPhilipp Reisner 	rcu_read_unlock();
27644ed167dSPhilipp Reisner 
277b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
278b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
279b411b363SPhilipp Reisner 
280668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
281668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
282668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
283668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
284668700b4SPhilipp Reisner 
28545bb912bSLars Ellenberg 	while (page == NULL) {
286b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
287b411b363SPhilipp Reisner 
288668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
289b411b363SPhilipp Reisner 
290b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
291b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
292b411b363SPhilipp Reisner 			if (page)
293b411b363SPhilipp Reisner 				break;
294b411b363SPhilipp Reisner 		}
295b411b363SPhilipp Reisner 
296b411b363SPhilipp Reisner 		if (!retry)
297b411b363SPhilipp Reisner 			break;
298b411b363SPhilipp Reisner 
299b411b363SPhilipp Reisner 		if (signal_pending(current)) {
300d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
301b411b363SPhilipp Reisner 			break;
302b411b363SPhilipp Reisner 		}
303b411b363SPhilipp Reisner 
3040e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3050e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
306b411b363SPhilipp Reisner 	}
307b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
308b411b363SPhilipp Reisner 
30945bb912bSLars Ellenberg 	if (page)
310b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
311b411b363SPhilipp Reisner 	return page;
312b411b363SPhilipp Reisner }
313b411b363SPhilipp Reisner 
314c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3150500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31645bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31745bb912bSLars Ellenberg  * or returns all pages to the system. */
318b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
319b411b363SPhilipp Reisner {
320b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
321b411b363SPhilipp Reisner 	int i;
322435f0740SLars Ellenberg 
323a73ff323SLars Ellenberg 	if (page == NULL)
324a73ff323SLars Ellenberg 		return;
325a73ff323SLars Ellenberg 
326183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32745bb912bSLars Ellenberg 		i = page_chain_free(page);
32845bb912bSLars Ellenberg 	else {
32945bb912bSLars Ellenberg 		struct page *tmp;
33045bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
331b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33245bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33345bb912bSLars Ellenberg 		drbd_pp_vacant += i;
334b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
335b411b363SPhilipp Reisner 	}
336435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33745bb912bSLars Ellenberg 	if (i < 0)
338d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
339435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
340b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
341b411b363SPhilipp Reisner }
342b411b363SPhilipp Reisner 
343b411b363SPhilipp Reisner /*
344b411b363SPhilipp Reisner You need to hold the req_lock:
345b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
346b411b363SPhilipp Reisner 
347b411b363SPhilipp Reisner You must not have the req_lock:
3483967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3490db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3507721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
351b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
352a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
353b411b363SPhilipp Reisner  drbd_clear_done_ee()
354b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
355b411b363SPhilipp Reisner */
356b411b363SPhilipp Reisner 
3579104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3589104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3599104d31aSLars Ellenberg  * trim: payload_size == 0 */
360f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36169a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3629104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
363b411b363SPhilipp Reisner {
36469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
365db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
366a73ff323SLars Ellenberg 	struct page *page = NULL;
3679104d31aSLars Ellenberg 	unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
368b411b363SPhilipp Reisner 
369b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
370b411b363SPhilipp Reisner 		return NULL;
371b411b363SPhilipp Reisner 
3720892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
373db830c46SAndreas Gruenbacher 	if (!peer_req) {
374b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
375d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
376b411b363SPhilipp Reisner 		return NULL;
377b411b363SPhilipp Reisner 	}
378b411b363SPhilipp Reisner 
3799104d31aSLars Ellenberg 	if (nr_pages) {
380d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
381d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38245bb912bSLars Ellenberg 		if (!page)
38345bb912bSLars Ellenberg 			goto fail;
384a73ff323SLars Ellenberg 	}
385b411b363SPhilipp Reisner 
386c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
387c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
388db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3899104d31aSLars Ellenberg 	peer_req->i.size = request_size;
390db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
391c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
392a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
393db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3949a8e7753SAndreas Gruenbacher 	/*
3959a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3969a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3979a8e7753SAndreas Gruenbacher 	 */
398db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
399b411b363SPhilipp Reisner 
400db830c46SAndreas Gruenbacher 	return peer_req;
401b411b363SPhilipp Reisner 
40245bb912bSLars Ellenberg  fail:
4030892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
404b411b363SPhilipp Reisner 	return NULL;
405b411b363SPhilipp Reisner }
406b411b363SPhilipp Reisner 
407b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
408f6ffca9fSAndreas Gruenbacher 		       int is_net)
409b411b363SPhilipp Reisner {
41021ae5d7fSLars Ellenberg 	might_sleep();
411db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
412db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
413b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4140b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4150b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41621ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41721ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41821ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41921ae5d7fSLars Ellenberg 	}
4200892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
421b411b363SPhilipp Reisner }
422b411b363SPhilipp Reisner 
423b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
424b411b363SPhilipp Reisner {
425b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
426db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
427b411b363SPhilipp Reisner 	int count = 0;
428b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
429b411b363SPhilipp Reisner 
4300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
431b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
433b411b363SPhilipp Reisner 
434a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
435b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
436b411b363SPhilipp Reisner 		count++;
437b411b363SPhilipp Reisner 	}
438b411b363SPhilipp Reisner 	return count;
439b411b363SPhilipp Reisner }
440b411b363SPhilipp Reisner 
441b411b363SPhilipp Reisner /*
442a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
443b411b363SPhilipp Reisner  */
444b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
445b411b363SPhilipp Reisner {
446b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
447b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
448db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
449e2b3032bSAndreas Gruenbacher 	int err = 0;
450b411b363SPhilipp Reisner 
4510500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
452b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
453b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4540500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
455b411b363SPhilipp Reisner 
456a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
457b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
458b411b363SPhilipp Reisner 
459b411b363SPhilipp Reisner 	/* possible callbacks here:
460d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
461b411b363SPhilipp Reisner 	 * all ignore the last argument.
462b411b363SPhilipp Reisner 	 */
463a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
464e2b3032bSAndreas Gruenbacher 		int err2;
465e2b3032bSAndreas Gruenbacher 
466b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
467a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
468e2b3032bSAndreas Gruenbacher 		if (!err)
469e2b3032bSAndreas Gruenbacher 			err = err2;
470b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
471b411b363SPhilipp Reisner 	}
472b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
473b411b363SPhilipp Reisner 
474e2b3032bSAndreas Gruenbacher 	return err;
475b411b363SPhilipp Reisner }
476b411b363SPhilipp Reisner 
477b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
478d4da1537SAndreas Gruenbacher 				     struct list_head *head)
479b411b363SPhilipp Reisner {
480b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
481b411b363SPhilipp Reisner 
482b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
483b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
484b411b363SPhilipp Reisner 	while (!list_empty(head)) {
485b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4860500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4877eaceaccSJens Axboe 		io_schedule();
488b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4890500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
490b411b363SPhilipp Reisner 	}
491b411b363SPhilipp Reisner }
492b411b363SPhilipp Reisner 
493b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
494d4da1537SAndreas Gruenbacher 				    struct list_head *head)
495b411b363SPhilipp Reisner {
4960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
497b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
499b411b363SPhilipp Reisner }
500b411b363SPhilipp Reisner 
501dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
502b411b363SPhilipp Reisner {
503b411b363SPhilipp Reisner 	struct kvec iov = {
504b411b363SPhilipp Reisner 		.iov_base = buf,
505b411b363SPhilipp Reisner 		.iov_len = size,
506b411b363SPhilipp Reisner 	};
507b411b363SPhilipp Reisner 	struct msghdr msg = {
508b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
509b411b363SPhilipp Reisner 	};
510aa563d7bSDavid Howells 	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
511f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
512b411b363SPhilipp Reisner }
513b411b363SPhilipp Reisner 
514bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
515b411b363SPhilipp Reisner {
516b411b363SPhilipp Reisner 	int rv;
517b411b363SPhilipp Reisner 
518bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
519b411b363SPhilipp Reisner 
520b411b363SPhilipp Reisner 	if (rv < 0) {
521b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5221ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
523b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
525b411b363SPhilipp Reisner 	} else if (rv == 0) {
526bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
527b66623e3SPhilipp Reisner 			long t;
528b66623e3SPhilipp Reisner 			rcu_read_lock();
529bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
530b66623e3SPhilipp Reisner 			rcu_read_unlock();
531b66623e3SPhilipp Reisner 
532bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
533b66623e3SPhilipp Reisner 
534599377acSPhilipp Reisner 			if (t)
535599377acSPhilipp Reisner 				goto out;
536599377acSPhilipp Reisner 		}
5371ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
538599377acSPhilipp Reisner 	}
539599377acSPhilipp Reisner 
540b411b363SPhilipp Reisner 	if (rv != size)
541bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
542b411b363SPhilipp Reisner 
543599377acSPhilipp Reisner out:
544b411b363SPhilipp Reisner 	return rv;
545b411b363SPhilipp Reisner }
546b411b363SPhilipp Reisner 
547bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
548c6967746SAndreas Gruenbacher {
549c6967746SAndreas Gruenbacher 	int err;
550c6967746SAndreas Gruenbacher 
551bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
552c6967746SAndreas Gruenbacher 	if (err != size) {
553c6967746SAndreas Gruenbacher 		if (err >= 0)
554c6967746SAndreas Gruenbacher 			err = -EIO;
555c6967746SAndreas Gruenbacher 	} else
556c6967746SAndreas Gruenbacher 		err = 0;
557c6967746SAndreas Gruenbacher 	return err;
558c6967746SAndreas Gruenbacher }
559c6967746SAndreas Gruenbacher 
560bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
561a5c31904SAndreas Gruenbacher {
562a5c31904SAndreas Gruenbacher 	int err;
563a5c31904SAndreas Gruenbacher 
564bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
565a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5661ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
567a5c31904SAndreas Gruenbacher 	return err;
568a5c31904SAndreas Gruenbacher }
569a5c31904SAndreas Gruenbacher 
5705dbf1673SLars Ellenberg /* quoting tcp(7):
5715dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5725dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5735dbf1673SLars Ellenberg  * This is our wrapper to do so.
5745dbf1673SLars Ellenberg  */
5755dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5765dbf1673SLars Ellenberg 		unsigned int rcv)
5775dbf1673SLars Ellenberg {
5785dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5795dbf1673SLars Ellenberg 	if (snd) {
5805dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5815dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5825dbf1673SLars Ellenberg 	}
5835dbf1673SLars Ellenberg 	if (rcv) {
5845dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5855dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5865dbf1673SLars Ellenberg 	}
5875dbf1673SLars Ellenberg }
5885dbf1673SLars Ellenberg 
589bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
590b411b363SPhilipp Reisner {
591b411b363SPhilipp Reisner 	const char *what;
592b411b363SPhilipp Reisner 	struct socket *sock;
593b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59444ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59544ed167dSPhilipp Reisner 	struct net_conf *nc;
59644ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59769ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
598b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
599b411b363SPhilipp Reisner 
60044ed167dSPhilipp Reisner 	rcu_read_lock();
601bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60244ed167dSPhilipp Reisner 	if (!nc) {
60344ed167dSPhilipp Reisner 		rcu_read_unlock();
604b411b363SPhilipp Reisner 		return NULL;
60544ed167dSPhilipp Reisner 	}
60644ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60744ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60869ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
609089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61044ed167dSPhilipp Reisner 
611bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
612bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61544ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61644ed167dSPhilipp Reisner 	else
61744ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61844ed167dSPhilipp Reisner 
619bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
620bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
621b411b363SPhilipp Reisner 
622b411b363SPhilipp Reisner 	what = "sock_create_kern";
623eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
624b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
625b411b363SPhilipp Reisner 	if (err < 0) {
626b411b363SPhilipp Reisner 		sock = NULL;
627b411b363SPhilipp Reisner 		goto out;
628b411b363SPhilipp Reisner 	}
629b411b363SPhilipp Reisner 
630b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63169ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63244ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
633b411b363SPhilipp Reisner 
634b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
635b411b363SPhilipp Reisner 	*  for the outgoing connections.
636b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
637b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
638b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
639b411b363SPhilipp Reisner 	*  a free one dynamically.
640b411b363SPhilipp Reisner 	*/
641b411b363SPhilipp Reisner 	what = "bind before connect";
64244ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
643b411b363SPhilipp Reisner 	if (err < 0)
644b411b363SPhilipp Reisner 		goto out;
645b411b363SPhilipp Reisner 
646b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
647b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
648b411b363SPhilipp Reisner 	disconnect_on_error = 0;
649b411b363SPhilipp Reisner 	what = "connect";
65044ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner out:
653b411b363SPhilipp Reisner 	if (err < 0) {
654b411b363SPhilipp Reisner 		if (sock) {
655b411b363SPhilipp Reisner 			sock_release(sock);
656b411b363SPhilipp Reisner 			sock = NULL;
657b411b363SPhilipp Reisner 		}
658b411b363SPhilipp Reisner 		switch (-err) {
659b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
660b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
661b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
662b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
663b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
664b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
665b411b363SPhilipp Reisner 			disconnect_on_error = 0;
666b411b363SPhilipp Reisner 			break;
667b411b363SPhilipp Reisner 		default:
6681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
669b411b363SPhilipp Reisner 		}
670b411b363SPhilipp Reisner 		if (disconnect_on_error)
671bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
672b411b363SPhilipp Reisner 	}
67344ed167dSPhilipp Reisner 
674b411b363SPhilipp Reisner 	return sock;
675b411b363SPhilipp Reisner }
676b411b363SPhilipp Reisner 
6777a426fd8SPhilipp Reisner struct accept_wait_data {
678bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6797a426fd8SPhilipp Reisner 	struct socket *s_listen;
6807a426fd8SPhilipp Reisner 	struct completion door_bell;
6817a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6827a426fd8SPhilipp Reisner 
6837a426fd8SPhilipp Reisner };
6847a426fd8SPhilipp Reisner 
685715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
686b411b363SPhilipp Reisner {
6877a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
688715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6897a426fd8SPhilipp Reisner 
690715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
691715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6927a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
693715306f6SAndreas Gruenbacher 	state_change(sk);
6947a426fd8SPhilipp Reisner }
6957a426fd8SPhilipp Reisner 
696bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
697b411b363SPhilipp Reisner {
6981f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69944ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7001f3e509bSPhilipp Reisner 	struct socket *s_listen;
70144ed167dSPhilipp Reisner 	struct net_conf *nc;
702b411b363SPhilipp Reisner 	const char *what;
703b411b363SPhilipp Reisner 
70444ed167dSPhilipp Reisner 	rcu_read_lock();
705bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70644ed167dSPhilipp Reisner 	if (!nc) {
70744ed167dSPhilipp Reisner 		rcu_read_unlock();
7087a426fd8SPhilipp Reisner 		return -EIO;
70944ed167dSPhilipp Reisner 	}
71044ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71144ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71244ed167dSPhilipp Reisner 	rcu_read_unlock();
713b411b363SPhilipp Reisner 
714bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
715bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
716b411b363SPhilipp Reisner 
717b411b363SPhilipp Reisner 	what = "sock_create_kern";
718eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
719b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
720b411b363SPhilipp Reisner 	if (err) {
721b411b363SPhilipp Reisner 		s_listen = NULL;
722b411b363SPhilipp Reisner 		goto out;
723b411b363SPhilipp Reisner 	}
724b411b363SPhilipp Reisner 
7254a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72644ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
727b411b363SPhilipp Reisner 
728b411b363SPhilipp Reisner 	what = "bind before listen";
72944ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
730b411b363SPhilipp Reisner 	if (err < 0)
731b411b363SPhilipp Reisner 		goto out;
732b411b363SPhilipp Reisner 
7337a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7347a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7357a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
736715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7377a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7387a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
739b411b363SPhilipp Reisner 
7402820fd39SPhilipp Reisner 	what = "listen";
7412820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7422820fd39SPhilipp Reisner 	if (err < 0)
7432820fd39SPhilipp Reisner 		goto out;
7442820fd39SPhilipp Reisner 
7457a426fd8SPhilipp Reisner 	return 0;
746b411b363SPhilipp Reisner out:
747b411b363SPhilipp Reisner 	if (s_listen)
748b411b363SPhilipp Reisner 		sock_release(s_listen);
749b411b363SPhilipp Reisner 	if (err < 0) {
750b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7511ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
752bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
753b411b363SPhilipp Reisner 		}
754b411b363SPhilipp Reisner 	}
7551f3e509bSPhilipp Reisner 
7567a426fd8SPhilipp Reisner 	return -EIO;
7571f3e509bSPhilipp Reisner }
7581f3e509bSPhilipp Reisner 
759715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
760715306f6SAndreas Gruenbacher {
761715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
762715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
763715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
764715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher }
766715306f6SAndreas Gruenbacher 
767bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7681f3e509bSPhilipp Reisner {
7691f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7701f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7711f3e509bSPhilipp Reisner 	struct net_conf *nc;
7721f3e509bSPhilipp Reisner 
7731f3e509bSPhilipp Reisner 	rcu_read_lock();
774bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7751f3e509bSPhilipp Reisner 	if (!nc) {
7761f3e509bSPhilipp Reisner 		rcu_read_unlock();
7771f3e509bSPhilipp Reisner 		return NULL;
7781f3e509bSPhilipp Reisner 	}
7791f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7801f3e509bSPhilipp Reisner 	rcu_read_unlock();
7811f3e509bSPhilipp Reisner 
7821f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78338b682b2SAkinobu Mita 	/* 28.5% random jitter */
78438b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7851f3e509bSPhilipp Reisner 
7867a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7877a426fd8SPhilipp Reisner 	if (err <= 0)
7887a426fd8SPhilipp Reisner 		return NULL;
7891f3e509bSPhilipp Reisner 
7907a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
791b411b363SPhilipp Reisner 	if (err < 0) {
792b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
794bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
795b411b363SPhilipp Reisner 		}
796b411b363SPhilipp Reisner 	}
797b411b363SPhilipp Reisner 
798715306f6SAndreas Gruenbacher 	if (s_estab)
799715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
800b411b363SPhilipp Reisner 
801b411b363SPhilipp Reisner 	return s_estab;
802b411b363SPhilipp Reisner }
803b411b363SPhilipp Reisner 
804bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
805b411b363SPhilipp Reisner 
806bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8079f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8089f5bdc33SAndreas Gruenbacher {
809bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8109f5bdc33SAndreas Gruenbacher 		return -EIO;
811bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
814bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
815b411b363SPhilipp Reisner {
816bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8179f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8184920e37aSPhilipp Reisner 	struct net_conf *nc;
8199f5bdc33SAndreas Gruenbacher 	int err;
820b411b363SPhilipp Reisner 
8214920e37aSPhilipp Reisner 	rcu_read_lock();
8224920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8234920e37aSPhilipp Reisner 	if (!nc) {
8244920e37aSPhilipp Reisner 		rcu_read_unlock();
8254920e37aSPhilipp Reisner 		return -EIO;
8264920e37aSPhilipp Reisner 	}
8274920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8284920e37aSPhilipp Reisner 	rcu_read_unlock();
8294920e37aSPhilipp Reisner 
830bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8319f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8329f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8339f5bdc33SAndreas Gruenbacher 			err = -EIO;
8349f5bdc33SAndreas Gruenbacher 		return err;
8359f5bdc33SAndreas Gruenbacher 	}
836bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8379f5bdc33SAndreas Gruenbacher 	if (err)
8389f5bdc33SAndreas Gruenbacher 		return err;
8399f5bdc33SAndreas Gruenbacher 	return pi.cmd;
840b411b363SPhilipp Reisner }
841b411b363SPhilipp Reisner 
842b411b363SPhilipp Reisner /**
843b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
844b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
845b411b363SPhilipp Reisner  */
8465d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
847b411b363SPhilipp Reisner {
848b411b363SPhilipp Reisner 	int rr;
849b411b363SPhilipp Reisner 	char tb[4];
850b411b363SPhilipp Reisner 
851b411b363SPhilipp Reisner 	if (!*sock)
85281e84650SAndreas Gruenbacher 		return false;
853b411b363SPhilipp Reisner 
854dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
855b411b363SPhilipp Reisner 
856b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85781e84650SAndreas Gruenbacher 		return true;
858b411b363SPhilipp Reisner 	} else {
859b411b363SPhilipp Reisner 		sock_release(*sock);
860b411b363SPhilipp Reisner 		*sock = NULL;
86181e84650SAndreas Gruenbacher 		return false;
862b411b363SPhilipp Reisner 	}
863b411b363SPhilipp Reisner }
8645d0b17f1SPhilipp Reisner 
8655d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8665d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8675d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8685d0b17f1SPhilipp Reisner {
8695d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8705d0b17f1SPhilipp Reisner 	int timeout;
8715d0b17f1SPhilipp Reisner 	bool ok;
8725d0b17f1SPhilipp Reisner 
8735d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8745d0b17f1SPhilipp Reisner 		return false;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	rcu_read_lock();
8775d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8785d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8795d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8805d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8815d0b17f1SPhilipp Reisner 
8825d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8835d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	return ok;
8865d0b17f1SPhilipp Reisner }
8875d0b17f1SPhilipp Reisner 
8882325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8892325eb66SPhilipp Reisner    in a connection */
89069a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
891907599e0SPhilipp Reisner {
89269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8930829f5edSAndreas Gruenbacher 	int err;
894907599e0SPhilipp Reisner 
895b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
896b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
897907599e0SPhilipp Reisner 
89869a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89969a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
900b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9018410da8fSPhilipp Reisner 
90269a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9030829f5edSAndreas Gruenbacher 	if (!err)
90469a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9050829f5edSAndreas Gruenbacher 	if (!err)
90669a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9070829f5edSAndreas Gruenbacher 	if (!err)
90869a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
909b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
910b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
911b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
912b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9130829f5edSAndreas Gruenbacher 	return err;
914907599e0SPhilipp Reisner }
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner /*
917b411b363SPhilipp Reisner  * return values:
918b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
919b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
920b411b363SPhilipp Reisner  *  -1 peer talks different language,
921b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
922b411b363SPhilipp Reisner  *  -2 We do not have a network config...
923b411b363SPhilipp Reisner  */
924bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
925b411b363SPhilipp Reisner {
9267da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
927c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92844ed167dSPhilipp Reisner 	struct net_conf *nc;
9295d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9305d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
931197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9327a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
933bde89a9eSAndreas Gruenbacher 		.connection = connection,
9347a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9357a426fd8SPhilipp Reisner 	};
936b411b363SPhilipp Reisner 
937bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
938bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
939b411b363SPhilipp Reisner 		return -2;
940b411b363SPhilipp Reisner 
9417da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
942bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
943bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9447da35862SPhilipp Reisner 	sock.socket = NULL;
9457da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
946bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
947bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9487da35862SPhilipp Reisner 	msock.socket = NULL;
9497da35862SPhilipp Reisner 
9500916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
951bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
952b411b363SPhilipp Reisner 
953bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9547a426fd8SPhilipp Reisner 		return 0;
955b411b363SPhilipp Reisner 
956b411b363SPhilipp Reisner 	do {
9572bf89621SAndreas Gruenbacher 		struct socket *s;
958b411b363SPhilipp Reisner 
959bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
960b411b363SPhilipp Reisner 		if (s) {
9617da35862SPhilipp Reisner 			if (!sock.socket) {
9627da35862SPhilipp Reisner 				sock.socket = s;
963bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9647da35862SPhilipp Reisner 			} else if (!msock.socket) {
965bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9667da35862SPhilipp Reisner 				msock.socket = s;
967bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
968b411b363SPhilipp Reisner 			} else {
9691ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
970b411b363SPhilipp Reisner 				goto out_release_sockets;
971b411b363SPhilipp Reisner 			}
972b411b363SPhilipp Reisner 		}
973b411b363SPhilipp Reisner 
9745d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
975b411b363SPhilipp Reisner 			break;
976b411b363SPhilipp Reisner 
977b411b363SPhilipp Reisner retry:
978bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
979b411b363SPhilipp Reisner 		if (s) {
980bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9817da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9827da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98392f14951SPhilipp Reisner 			switch (fp) {
984e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9857da35862SPhilipp Reisner 				if (sock.socket) {
9861ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9877da35862SPhilipp Reisner 					sock_release(sock.socket);
98880c6eed4SPhilipp Reisner 					sock.socket = s;
98980c6eed4SPhilipp Reisner 					goto randomize;
990b411b363SPhilipp Reisner 				}
9917da35862SPhilipp Reisner 				sock.socket = s;
992b411b363SPhilipp Reisner 				break;
993e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
994bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9957da35862SPhilipp Reisner 				if (msock.socket) {
9961ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9977da35862SPhilipp Reisner 					sock_release(msock.socket);
99880c6eed4SPhilipp Reisner 					msock.socket = s;
99980c6eed4SPhilipp Reisner 					goto randomize;
1000b411b363SPhilipp Reisner 				}
10017da35862SPhilipp Reisner 				msock.socket = s;
1002b411b363SPhilipp Reisner 				break;
1003b411b363SPhilipp Reisner 			default:
10041ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1005b411b363SPhilipp Reisner 				sock_release(s);
100680c6eed4SPhilipp Reisner randomize:
100738b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1008b411b363SPhilipp Reisner 					goto retry;
1009b411b363SPhilipp Reisner 			}
1010b411b363SPhilipp Reisner 		}
1011b411b363SPhilipp Reisner 
1012bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1013b411b363SPhilipp Reisner 			goto out_release_sockets;
1014b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1015b411b363SPhilipp Reisner 			flush_signals(current);
1016b411b363SPhilipp Reisner 			smp_rmb();
1017bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1018b411b363SPhilipp Reisner 				goto out_release_sockets;
1019b411b363SPhilipp Reisner 		}
1020b411b363SPhilipp Reisner 
10215d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1022b666dbf8SPhilipp Reisner 	} while (!ok);
1023b411b363SPhilipp Reisner 
10247a426fd8SPhilipp Reisner 	if (ad.s_listen)
10257a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1026b411b363SPhilipp Reisner 
102798683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102898683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1029b411b363SPhilipp Reisner 
10307da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10317da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1035b411b363SPhilipp Reisner 
1036b411b363SPhilipp Reisner 	/* NOT YET ...
1037bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10387da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10396038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1040b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104144ed167dSPhilipp Reisner 	rcu_read_lock();
1042bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1043b411b363SPhilipp Reisner 
10447da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10457da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104644ed167dSPhilipp Reisner 
10477da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104844ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104908b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105044ed167dSPhilipp Reisner 	rcu_read_unlock();
105144ed167dSPhilipp Reisner 
10527da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1053b411b363SPhilipp Reisner 
1054b411b363SPhilipp Reisner 	/* we don't want delays.
105525985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105612abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105712abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1058b411b363SPhilipp Reisner 
1059bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1060bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1061bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1062b411b363SPhilipp Reisner 
1063bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1064b411b363SPhilipp Reisner 	if (h <= 0)
1065b411b363SPhilipp Reisner 		return h;
1066b411b363SPhilipp Reisner 
1067bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1068b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1069bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1070b10d96cbSJohannes Thoma 		case -1:
10711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1072b411b363SPhilipp Reisner 			return -1;
1073b10d96cbSJohannes Thoma 		case 0:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1075b10d96cbSJohannes Thoma 			return 0;
1076b411b363SPhilipp Reisner 		}
1077b411b363SPhilipp Reisner 	}
1078b411b363SPhilipp Reisner 
1079bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1080bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10837e2455c1SPhilipp Reisner 		return -1;
10841e86ac48SPhilipp Reisner 
108513c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108613c76abaSPhilipp Reisner 	 * being promoted to Primary.
108713c76abaSPhilipp Reisner 	 *
108813c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108913c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109013c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109113c76abaSPhilipp Reisner 	 */
109231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109331007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109431007745SPhilipp Reisner 
1095cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1096cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109731007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1098cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112739e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112839e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112939e91a60SLars Ellenberg 	connection->ack_sender =
113039e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1131668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1132668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1133668700b4SPhilipp Reisner 		return 0;
1134668700b4SPhilipp Reisner 	}
1135b411b363SPhilipp Reisner 
11360500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113708b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113808b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113908b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114008b165baSPhilipp Reisner 	 * just to clear a single value. */
1141bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11420500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114308b165baSPhilipp Reisner 
1144d3fcb490SPhilipp Reisner 	return h;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner out_release_sockets:
11477a426fd8SPhilipp Reisner 	if (ad.s_listen)
11487a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11497da35862SPhilipp Reisner 	if (sock.socket)
11507da35862SPhilipp Reisner 		sock_release(sock.socket);
11517da35862SPhilipp Reisner 	if (msock.socket)
11527da35862SPhilipp Reisner 		sock_release(msock.socket);
1153b411b363SPhilipp Reisner 	return -1;
1154b411b363SPhilipp Reisner }
1155b411b363SPhilipp Reisner 
1156bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1157b411b363SPhilipp Reisner {
1158bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1159b411b363SPhilipp Reisner 
11600c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11610c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11620c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11630c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11650c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116602918be2SPhilipp Reisner 		}
11670c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11680c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11690c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11700c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1171e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1172e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1173e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1174b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1175eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1176e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1177e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1178e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1179e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1180e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118177351055SPhilipp Reisner 		pi->vnr = 0;
118202918be2SPhilipp Reisner 	} else {
11831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1184e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1185bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11868172f3e9SAndreas Gruenbacher 		return -EINVAL;
1187b411b363SPhilipp Reisner 	}
1188e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11898172f3e9SAndreas Gruenbacher 	return 0;
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
1192c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1193c51a0ef3SLars Ellenberg {
1194c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1195c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1196c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1197c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1198c51a0ef3SLars Ellenberg }
1199c51a0ef3SLars Ellenberg 
1200bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1201257d0af6SPhilipp Reisner {
1202bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120369bc7bc3SAndreas Gruenbacher 	int err;
1204257d0af6SPhilipp Reisner 
1205bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1206a5c31904SAndreas Gruenbacher 	if (err)
120769bc7bc3SAndreas Gruenbacher 		return err;
1208257d0af6SPhilipp Reisner 
1209bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1210bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1211b411b363SPhilipp Reisner 
121269bc7bc3SAndreas Gruenbacher 	return err;
1213b411b363SPhilipp Reisner }
1214b411b363SPhilipp Reisner 
1215c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1216c51a0ef3SLars Ellenberg {
1217c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1218c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1219c51a0ef3SLars Ellenberg 	int err;
1220c51a0ef3SLars Ellenberg 
1221c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1222c51a0ef3SLars Ellenberg 	if (err != size) {
1223c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1224c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1225c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1226c51a0ef3SLars Ellenberg 		 * received so far. */
1227c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1228ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1229c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1230c51a0ef3SLars Ellenberg 		}
1231c51a0ef3SLars Ellenberg 		if (err > 0) {
1232c51a0ef3SLars Ellenberg 			buffer += err;
1233c51a0ef3SLars Ellenberg 			size -= err;
1234c51a0ef3SLars Ellenberg 		}
1235c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1236c51a0ef3SLars Ellenberg 		if (err)
1237c51a0ef3SLars Ellenberg 			return err;
1238c51a0ef3SLars Ellenberg 	}
1239c51a0ef3SLars Ellenberg 
1240c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1241c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1242c51a0ef3SLars Ellenberg 
1243c51a0ef3SLars Ellenberg 	return err;
1244c51a0ef3SLars Ellenberg }
1245f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1246f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1247f9ff0da5SLars Ellenberg  * then wait for all completions.
1248f9ff0da5SLars Ellenberg  */
1249f9ff0da5SLars Ellenberg struct issue_flush_context {
1250f9ff0da5SLars Ellenberg 	atomic_t pending;
1251f9ff0da5SLars Ellenberg 	int error;
1252f9ff0da5SLars Ellenberg 	struct completion done;
1253f9ff0da5SLars Ellenberg };
1254f9ff0da5SLars Ellenberg struct one_flush_context {
1255f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1256f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1257f9ff0da5SLars Ellenberg };
1258f9ff0da5SLars Ellenberg 
12591ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1260f9ff0da5SLars Ellenberg {
1261f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1262f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1263f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1264f9ff0da5SLars Ellenberg 
12654e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12664e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12674e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1268f9ff0da5SLars Ellenberg 	}
1269f9ff0da5SLars Ellenberg 	kfree(octx);
1270f9ff0da5SLars Ellenberg 	bio_put(bio);
1271f9ff0da5SLars Ellenberg 
1272f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1273f9ff0da5SLars Ellenberg 	put_ldev(device);
1274f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1275f9ff0da5SLars Ellenberg 
1276f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1277f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1278f9ff0da5SLars Ellenberg }
1279f9ff0da5SLars Ellenberg 
1280f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1281f9ff0da5SLars Ellenberg {
1282f9ff0da5SLars Ellenberg 	struct bio *bio = bio_alloc(GFP_NOIO, 0);
1283f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1284f9ff0da5SLars Ellenberg 	if (!bio || !octx) {
1285f9ff0da5SLars Ellenberg 		drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1286f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1287f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1288f9ff0da5SLars Ellenberg 		 */
1289f9ff0da5SLars Ellenberg 		kfree(octx);
1290f9ff0da5SLars Ellenberg 		if (bio)
1291f9ff0da5SLars Ellenberg 			bio_put(bio);
1292f9ff0da5SLars Ellenberg 
1293f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1294f9ff0da5SLars Ellenberg 		put_ldev(device);
1295f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1296f9ff0da5SLars Ellenberg 		return;
1297f9ff0da5SLars Ellenberg 	}
1298f9ff0da5SLars Ellenberg 
1299f9ff0da5SLars Ellenberg 	octx->device = device;
1300f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
130174d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1302f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1303f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
130470fd7614SChristoph Hellwig 	bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
1305f9ff0da5SLars Ellenberg 
1306f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1307f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1308f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1309f9ff0da5SLars Ellenberg 	submit_bio(bio);
1310f9ff0da5SLars Ellenberg }
1311f9ff0da5SLars Ellenberg 
1312bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1313b411b363SPhilipp Reisner {
1314f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1315c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1316f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13174b0007c0SPhilipp Reisner 		int vnr;
1318b411b363SPhilipp Reisner 
1319f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1320f9ff0da5SLars Ellenberg 		ctx.error = 0;
1321f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1322f9ff0da5SLars Ellenberg 
1323615e087fSLars Ellenberg 		rcu_read_lock();
1324c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1325c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1326c06ece6bSAndreas Gruenbacher 
1327b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1328615e087fSLars Ellenberg 				continue;
1329b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1330615e087fSLars Ellenberg 			rcu_read_unlock();
13314b0007c0SPhilipp Reisner 
1332f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1333f9ff0da5SLars Ellenberg 
1334f9ff0da5SLars Ellenberg 			rcu_read_lock();
1335f9ff0da5SLars Ellenberg 		}
1336f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1337f9ff0da5SLars Ellenberg 
1338f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1339f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1340f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1341f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1342f9ff0da5SLars Ellenberg 
1343f9ff0da5SLars Ellenberg 		if (ctx.error) {
1344b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1345b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1346b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1347f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1348f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1349b411b363SPhilipp Reisner 		}
1350b411b363SPhilipp Reisner 	}
1351b411b363SPhilipp Reisner }
1352b411b363SPhilipp Reisner 
1353b411b363SPhilipp Reisner /**
1354b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13559b48ff07SLee Jones  * @connection:	DRBD connection.
1356b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1357b411b363SPhilipp Reisner  * @ev:		Epoch event.
1358b411b363SPhilipp Reisner  */
1359bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1360b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1361b411b363SPhilipp Reisner 					       enum epoch_event ev)
1362b411b363SPhilipp Reisner {
13632451fc3bSPhilipp Reisner 	int epoch_size;
1364b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1365b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1366b411b363SPhilipp Reisner 
1367bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1368b411b363SPhilipp Reisner 	do {
1369b411b363SPhilipp Reisner 		next_epoch = NULL;
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1372b411b363SPhilipp Reisner 
1373b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1374b411b363SPhilipp Reisner 		case EV_PUT:
1375b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1376b411b363SPhilipp Reisner 			break;
1377b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1378b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1379b411b363SPhilipp Reisner 			break;
1380b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1381b411b363SPhilipp Reisner 			/* nothing to do*/
1382b411b363SPhilipp Reisner 			break;
1383b411b363SPhilipp Reisner 		}
1384b411b363SPhilipp Reisner 
1385b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1386b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138780f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1388b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1389bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1390bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1391bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1392b411b363SPhilipp Reisner 			}
13939ed57dcbSLars Ellenberg #if 0
13949ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13959ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139680f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1397bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13989ed57dcbSLars Ellenberg #endif
1399b411b363SPhilipp Reisner 
1400bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1401b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1402b411b363SPhilipp Reisner 				list_del(&epoch->list);
1403b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1404bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1405b411b363SPhilipp Reisner 				kfree(epoch);
1406b411b363SPhilipp Reisner 
1407b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1408b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1409b411b363SPhilipp Reisner 			} else {
1410b411b363SPhilipp Reisner 				epoch->flags = 0;
1411b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1412698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1413b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1414b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1415b411b363SPhilipp Reisner 			}
1416b411b363SPhilipp Reisner 		}
1417b411b363SPhilipp Reisner 
1418b411b363SPhilipp Reisner 		if (!next_epoch)
1419b411b363SPhilipp Reisner 			break;
1420b411b363SPhilipp Reisner 
1421b411b363SPhilipp Reisner 		epoch = next_epoch;
1422b411b363SPhilipp Reisner 	} while (1);
1423b411b363SPhilipp Reisner 
1424bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1425b411b363SPhilipp Reisner 
1426b411b363SPhilipp Reisner 	return rv;
1427b411b363SPhilipp Reisner }
1428b411b363SPhilipp Reisner 
14298fe39aacSPhilipp Reisner static enum write_ordering_e
14308fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14318fe39aacSPhilipp Reisner {
14328fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14338fe39aacSPhilipp Reisner 
14348fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14358fe39aacSPhilipp Reisner 
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1438f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1439f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14408fe39aacSPhilipp Reisner 
14418fe39aacSPhilipp Reisner 	return wo;
14428fe39aacSPhilipp Reisner }
14438fe39aacSPhilipp Reisner 
14449b48ff07SLee Jones /*
1445b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1446b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1447b411b363SPhilipp Reisner  */
14488fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14498fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1450b411b363SPhilipp Reisner {
1451e9526580SPhilipp Reisner 	struct drbd_device *device;
1452b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14534b0007c0SPhilipp Reisner 	int vnr;
1454b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1455f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1456f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1457f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1458b411b363SPhilipp Reisner 	};
1459b411b363SPhilipp Reisner 
1460e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1461f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1462b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1463daeda1ccSPhilipp Reisner 	rcu_read_lock();
1464e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14658fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14668fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14678fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14688fe39aacSPhilipp Reisner 				bdev = NULL;
1469b30ab791SAndreas Gruenbacher 			put_ldev(device);
14704b0007c0SPhilipp Reisner 		}
14718fe39aacSPhilipp Reisner 	}
14728fe39aacSPhilipp Reisner 
14738fe39aacSPhilipp Reisner 	if (bdev)
14748fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14758fe39aacSPhilipp Reisner 
147670df7092SLars Ellenberg 	rcu_read_unlock();
147770df7092SLars Ellenberg 
1478e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1479f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1480e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1481b411b363SPhilipp Reisner }
1482b411b363SPhilipp Reisner 
1483f31e583aSLars Ellenberg /*
1484f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1485f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1486f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1487f31e583aSLars Ellenberg  * never even try to UNMAP.
1488f31e583aSLars Ellenberg  *
1489f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1490f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1491f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1492f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1495f31e583aSLars Ellenberg  *
1496f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1497f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1498f31e583aSLars Ellenberg  *
1499f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1500f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1501f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1502f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1503f31e583aSLars Ellenberg  * still behaves this way.
1504f31e583aSLars Ellenberg  *
1505f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1506f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1507f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1508f31e583aSLars Ellenberg  *
1509f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1510f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1511f31e583aSLars Ellenberg  */
1512f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1513f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1514dd4f699dSLars Ellenberg {
15150dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1516f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(bdev);
1517f31e583aSLars Ellenberg 	sector_t tmp, nr;
1518f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1519f31e583aSLars Ellenberg 	int alignment;
1520f31e583aSLars Ellenberg 	int err = 0;
1521dd4f699dSLars Ellenberg 
1522f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1523f31e583aSLars Ellenberg 		goto zero_out;
1524f31e583aSLars Ellenberg 
1525f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
1526f31e583aSLars Ellenberg 	granularity = max(q->limits.discard_granularity >> 9, 1U);
1527f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1528f31e583aSLars Ellenberg 
1529f31e583aSLars Ellenberg 	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1530f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1531f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1532f31e583aSLars Ellenberg 		goto zero_out;
1533f31e583aSLars Ellenberg 
1534f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1535f31e583aSLars Ellenberg 		goto zero_out;
1536f31e583aSLars Ellenberg 
1537f31e583aSLars Ellenberg 	tmp = start;
1538f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1539f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1540f31e583aSLars Ellenberg 			goto zero_out;
1541f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1542f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1543f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1544f31e583aSLars Ellenberg 
1545f31e583aSLars Ellenberg 		nr = tmp - start;
1546f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1547f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1548f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1549f31e583aSLars Ellenberg 		nr_sectors -= nr;
1550f31e583aSLars Ellenberg 		start = tmp;
1551f31e583aSLars Ellenberg 	}
1552f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
1553f31e583aSLars Ellenberg 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
1554f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1555f31e583aSLars Ellenberg 		start += max_discard_sectors;
1556f31e583aSLars Ellenberg 	}
1557f31e583aSLars Ellenberg 	if (nr_sectors) {
1558f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1559f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1560f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1561f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1562f31e583aSLars Ellenberg 		nr = nr_sectors;
1563f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1564f31e583aSLars Ellenberg 		if (nr) {
1565f31e583aSLars Ellenberg 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1566f31e583aSLars Ellenberg 			nr_sectors -= nr;
1567f31e583aSLars Ellenberg 			start += nr;
1568f31e583aSLars Ellenberg 		}
1569f31e583aSLars Ellenberg 	}
1570f31e583aSLars Ellenberg  zero_out:
1571f31e583aSLars Ellenberg 	if (nr_sectors) {
1572f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1573f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1574f31e583aSLars Ellenberg 	}
1575f31e583aSLars Ellenberg 	return err != 0;
1576f31e583aSLars Ellenberg }
1577f31e583aSLars Ellenberg 
1578f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1579f31e583aSLars Ellenberg {
1580f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1581f31e583aSLars Ellenberg 	struct disk_conf *dc;
1582f31e583aSLars Ellenberg 	bool can_do;
1583f31e583aSLars Ellenberg 
1584f31e583aSLars Ellenberg 	if (!blk_queue_discard(q))
1585f31e583aSLars Ellenberg 		return false;
1586f31e583aSLars Ellenberg 
1587f31e583aSLars Ellenberg 	rcu_read_lock();
1588f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1589f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1590f31e583aSLars Ellenberg 	rcu_read_unlock();
1591f31e583aSLars Ellenberg 	return can_do;
1592f31e583aSLars Ellenberg }
1593f31e583aSLars Ellenberg 
1594f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1595f31e583aSLars Ellenberg {
1596f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1597f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1598f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1599f31e583aSLars Ellenberg 	 * otherwise. */
1600f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1601f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1602f31e583aSLars Ellenberg 
1603f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1604f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1605dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1606dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1607dd4f699dSLars Ellenberg }
1608dd4f699dSLars Ellenberg 
16099104d31aSLars Ellenberg static void drbd_issue_peer_wsame(struct drbd_device *device,
16109104d31aSLars Ellenberg 				  struct drbd_peer_request *peer_req)
16119104d31aSLars Ellenberg {
16129104d31aSLars Ellenberg 	struct block_device *bdev = device->ldev->backing_bdev;
16139104d31aSLars Ellenberg 	sector_t s = peer_req->i.sector;
16149104d31aSLars Ellenberg 	sector_t nr = peer_req->i.size >> 9;
16159104d31aSLars Ellenberg 	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
16169104d31aSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
16179104d31aSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
16189104d31aSLars Ellenberg }
16199104d31aSLars Ellenberg 
16209104d31aSLars Ellenberg 
16216ec2a0f2SLee Jones /*
1622fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1623b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1624db830c46SAndreas Gruenbacher  * @peer_req:	peer request
162510f6d992SLars Ellenberg  *
162610f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162710f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162810f6d992SLars Ellenberg  *
162910f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
163010f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
163110f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
163210f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
163310f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
163410f6d992SLars Ellenberg  *  on certain Xen deployments.
163545bb912bSLars Ellenberg  */
163645bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1637b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1638fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
1639bb3cc85eSMike Christie 			     const unsigned op, const unsigned op_flags,
1640bb3cc85eSMike Christie 			     const int fault_type)
164145bb912bSLars Ellenberg {
164245bb912bSLars Ellenberg 	struct bio *bios = NULL;
164345bb912bSLars Ellenberg 	struct bio *bio;
1644db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1645db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
164611f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
164745bb912bSLars Ellenberg 	unsigned n_bios = 0;
164811f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
164910f6d992SLars Ellenberg 	int err = -ENOMEM;
165045bb912bSLars Ellenberg 
1651dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1652dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1653dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1654dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1655dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1656dd4f699dSLars Ellenberg 	 */
1657f31e583aSLars Ellenberg 	if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
1658a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1659a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16605dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
166145d2933cSLars Ellenberg 		/* add it to the active list now,
166245d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
166321ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
166421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1665700ca8c0SPhilipp Reisner 
1666700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1667700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1668700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166945d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
167045d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
167145d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1672700ca8c0SPhilipp Reisner 		}
1673700ca8c0SPhilipp Reisner 
1674f31e583aSLars Ellenberg 		if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
1675f31e583aSLars Ellenberg 			drbd_issue_peer_discard_or_zero_out(device, peer_req);
16769104d31aSLars Ellenberg 		else /* EE_WRITE_SAME */
16779104d31aSLars Ellenberg 			drbd_issue_peer_wsame(device, peer_req);
1678a0fb3c47SLars Ellenberg 		return 0;
1679a0fb3c47SLars Ellenberg 	}
1680a0fb3c47SLars Ellenberg 
168145bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
168245bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
168345bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16849476f39dSLars Ellenberg 	 * request in more than one bio.
16859476f39dSLars Ellenberg 	 *
16869476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16879476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16889476f39dSLars Ellenberg 	 */
168945bb912bSLars Ellenberg next_bio:
169045bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
169145bb912bSLars Ellenberg 	if (!bio) {
1692a0fb3c47SLars Ellenberg 		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
169345bb912bSLars Ellenberg 		goto fail;
169445bb912bSLars Ellenberg 	}
1695db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16964f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
169774d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1698bb3cc85eSMike Christie 	bio_set_op_attrs(bio, op, op_flags);
1699db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1700fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
170145bb912bSLars Ellenberg 
170245bb912bSLars Ellenberg 	bio->bi_next = bios;
170345bb912bSLars Ellenberg 	bios = bio;
170445bb912bSLars Ellenberg 	++n_bios;
170545bb912bSLars Ellenberg 
170645bb912bSLars Ellenberg 	page_chain_for_each(page) {
170711f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170806efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170945bb912bSLars Ellenberg 			goto next_bio;
171011f8b2b6SAndreas Gruenbacher 		data_size -= len;
171145bb912bSLars Ellenberg 		sector += len >> 9;
171245bb912bSLars Ellenberg 		--nr_pages;
171345bb912bSLars Ellenberg 	}
171411f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1715a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
171645bb912bSLars Ellenberg 
1717db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171821ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171921ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
172021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
172145bb912bSLars Ellenberg 	do {
172245bb912bSLars Ellenberg 		bio = bios;
172345bb912bSLars Ellenberg 		bios = bios->bi_next;
172445bb912bSLars Ellenberg 		bio->bi_next = NULL;
172545bb912bSLars Ellenberg 
1726ed00aabdSChristoph Hellwig 		drbd_submit_bio_noacct(device, fault_type, bio);
172745bb912bSLars Ellenberg 	} while (bios);
172845bb912bSLars Ellenberg 	return 0;
172945bb912bSLars Ellenberg 
173045bb912bSLars Ellenberg fail:
173145bb912bSLars Ellenberg 	while (bios) {
173245bb912bSLars Ellenberg 		bio = bios;
173345bb912bSLars Ellenberg 		bios = bios->bi_next;
173445bb912bSLars Ellenberg 		bio_put(bio);
173545bb912bSLars Ellenberg 	}
173610f6d992SLars Ellenberg 	return err;
173745bb912bSLars Ellenberg }
173845bb912bSLars Ellenberg 
1739b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1740db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
174153840641SAndreas Gruenbacher {
1742db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
174353840641SAndreas Gruenbacher 
1744b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
174553840641SAndreas Gruenbacher 	drbd_clear_interval(i);
174653840641SAndreas Gruenbacher 
17476c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
174853840641SAndreas Gruenbacher 	if (i->waiting)
1749b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
175053840641SAndreas Gruenbacher }
175153840641SAndreas Gruenbacher 
1752bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
175377fede51SPhilipp Reisner {
1754c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
175577fede51SPhilipp Reisner 	int vnr;
175677fede51SPhilipp Reisner 
175777fede51SPhilipp Reisner 	rcu_read_lock();
1758c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1759c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1760c06ece6bSAndreas Gruenbacher 
1761b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
176277fede51SPhilipp Reisner 		rcu_read_unlock();
1763b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
176405a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
176577fede51SPhilipp Reisner 		rcu_read_lock();
176677fede51SPhilipp Reisner 	}
176777fede51SPhilipp Reisner 	rcu_read_unlock();
176877fede51SPhilipp Reisner }
176977fede51SPhilipp Reisner 
1770bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1771b411b363SPhilipp Reisner {
17722451fc3bSPhilipp Reisner 	int rv;
1773e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1774b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1775b411b363SPhilipp Reisner 
17769ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17779ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17789ed57dcbSLars Ellenberg 	 */
1779bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1780bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1781bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1782b411b363SPhilipp Reisner 
1783b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1784b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1785b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1786b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1787b411b363SPhilipp Reisner 	 * completed. */
1788e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1789f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1790b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
179182bc0194SAndreas Gruenbacher 			return 0;
1792b411b363SPhilipp Reisner 
1793b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1794b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1795b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17962451fc3bSPhilipp Reisner 		if (epoch)
17972451fc3bSPhilipp Reisner 			break;
17982451fc3bSPhilipp Reisner 		else
17991ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1800df561f66SGustavo A. R. Silva 		fallthrough;
18012451fc3bSPhilipp Reisner 
1802f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1803f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1804bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1805bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
18062451fc3bSPhilipp Reisner 
1807bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
18082451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
18092451fc3bSPhilipp Reisner 			if (epoch)
18102451fc3bSPhilipp Reisner 				break;
1811b411b363SPhilipp Reisner 		}
1812b411b363SPhilipp Reisner 
181382bc0194SAndreas Gruenbacher 		return 0;
18142451fc3bSPhilipp Reisner 	default:
1815e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1816e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
181782bc0194SAndreas Gruenbacher 		return -EIO;
1818b411b363SPhilipp Reisner 	}
1819b411b363SPhilipp Reisner 
1820b411b363SPhilipp Reisner 	epoch->flags = 0;
1821b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1822b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1823b411b363SPhilipp Reisner 
1824bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1825bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1826bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1827bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1828bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1829b411b363SPhilipp Reisner 	} else {
1830b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1831b411b363SPhilipp Reisner 		kfree(epoch);
1832b411b363SPhilipp Reisner 	}
1833bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1834b411b363SPhilipp Reisner 
183582bc0194SAndreas Gruenbacher 	return 0;
1836b411b363SPhilipp Reisner }
1837b411b363SPhilipp Reisner 
18389104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18393d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18409104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18419104d31aSLars Ellenberg 			      unsigned int payload_size)
18429104d31aSLars Ellenberg {
18439104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18449104d31aSLars Ellenberg 	r->i.size = payload_size;
18459104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18469104d31aSLars Ellenberg 	r->i.size = tmp;
18479104d31aSLars Ellenberg }
18489104d31aSLars Ellenberg 
1849b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18509104d31aSLars Ellenberg  * and from receive_Data.
18519104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18529104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18539104d31aSLars Ellenberg  * 	for discards, that is zero.
18549104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18559104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18569104d31aSLars Ellenberg  * as extra argument in the packet header.
18579104d31aSLars Ellenberg  */
1858f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
185969a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1860a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1861b411b363SPhilipp Reisner {
186269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1863155bd9d1SChristoph Hellwig 	const sector_t capacity = get_capacity(device->vdisk);
1864db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1865b411b363SPhilipp Reisner 	struct page *page;
186611f8b2b6SAndreas Gruenbacher 	int digest_size, err;
186711f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
186869a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
186969a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18706b4388acSPhilipp Reisner 	unsigned long *data;
1871a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1872f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
18739104d31aSLars Ellenberg 	struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1874b411b363SPhilipp Reisner 
187511f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1876a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18773d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18789f5bdc33SAndreas Gruenbacher 		/*
18799f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18809f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18819f5bdc33SAndreas Gruenbacher 		 */
188211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1883a5c31904SAndreas Gruenbacher 		if (err)
1884b411b363SPhilipp Reisner 			return NULL;
188511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
188688104ca4SAndreas Gruenbacher 	}
1887b411b363SPhilipp Reisner 
18889104d31aSLars Ellenberg 	/* assume request_size == data_size, but special case trim and wsame. */
18899104d31aSLars Ellenberg 	ds = data_size;
1890a0fb3c47SLars Ellenberg 	if (trim) {
18919104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18929104d31aSLars Ellenberg 			return NULL;
18939104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1894f31e583aSLars Ellenberg 	} else if (zeroes) {
1895f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1896f31e583aSLars Ellenberg 			return NULL;
1897f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
18989104d31aSLars Ellenberg 	} else if (wsame) {
18999104d31aSLars Ellenberg 		if (data_size != queue_logical_block_size(device->rq_queue)) {
19009104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
19019104d31aSLars Ellenberg 				data_size, queue_logical_block_size(device->rq_queue));
19029104d31aSLars Ellenberg 			return NULL;
19039104d31aSLars Ellenberg 		}
19049104d31aSLars Ellenberg 		if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
19059104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
19069104d31aSLars Ellenberg 				data_size, bdev_logical_block_size(device->ldev->backing_bdev));
19079104d31aSLars Ellenberg 			return NULL;
19089104d31aSLars Ellenberg 		}
19099104d31aSLars Ellenberg 		ds = be32_to_cpu(wsame->size);
1910a0fb3c47SLars Ellenberg 	}
1911a0fb3c47SLars Ellenberg 
19129104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1913841ce241SAndreas Gruenbacher 		return NULL;
1914f31e583aSLars Ellenberg 	if (trim || wsame || zeroes) {
19159104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
19169104d31aSLars Ellenberg 			return NULL;
19179104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1918841ce241SAndreas Gruenbacher 		return NULL;
1919b411b363SPhilipp Reisner 
19206666032aSLars Ellenberg 	/* even though we trust out peer,
19216666032aSLars Ellenberg 	 * we sometimes have to double check. */
19229104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1923d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1924fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
19256666032aSLars Ellenberg 			(unsigned long long)capacity,
19269104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19276666032aSLars Ellenberg 		return NULL;
19286666032aSLars Ellenberg 	}
19296666032aSLars Ellenberg 
1930b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1931b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1932b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19339104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1934db830c46SAndreas Gruenbacher 	if (!peer_req)
1935b411b363SPhilipp Reisner 		return NULL;
193645bb912bSLars Ellenberg 
193721ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19389104d31aSLars Ellenberg 	if (trim) {
1939f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1940f31e583aSLars Ellenberg 		return peer_req;
1941f31e583aSLars Ellenberg 	}
1942f31e583aSLars Ellenberg 	if (zeroes) {
1943f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
194481a3537aSLars Ellenberg 		return peer_req;
19459104d31aSLars Ellenberg 	}
19469104d31aSLars Ellenberg 	if (wsame)
19479104d31aSLars Ellenberg 		peer_req->flags |= EE_WRITE_SAME;
1948a73ff323SLars Ellenberg 
19499104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1950b411b363SPhilipp Reisner 	ds = data_size;
1951db830c46SAndreas Gruenbacher 	page = peer_req->pages;
195245bb912bSLars Ellenberg 	page_chain_for_each(page) {
195345bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19546b4388acSPhilipp Reisner 		data = kmap(page);
195569a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1956b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1957d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19586b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19596b4388acSPhilipp Reisner 		}
1960b411b363SPhilipp Reisner 		kunmap(page);
1961a5c31904SAndreas Gruenbacher 		if (err) {
1962b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1963b411b363SPhilipp Reisner 			return NULL;
1964b411b363SPhilipp Reisner 		}
1965a5c31904SAndreas Gruenbacher 		ds -= len;
1966b411b363SPhilipp Reisner 	}
1967b411b363SPhilipp Reisner 
196811f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19699104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
197011f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1971d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1972470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1973b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1974b411b363SPhilipp Reisner 			return NULL;
1975b411b363SPhilipp Reisner 		}
1976b411b363SPhilipp Reisner 	}
1977b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1978db830c46SAndreas Gruenbacher 	return peer_req;
1979b411b363SPhilipp Reisner }
1980b411b363SPhilipp Reisner 
1981b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1982b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1983b411b363SPhilipp Reisner  */
198469a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1985b411b363SPhilipp Reisner {
1986b411b363SPhilipp Reisner 	struct page *page;
1987a5c31904SAndreas Gruenbacher 	int err = 0;
1988b411b363SPhilipp Reisner 	void *data;
1989b411b363SPhilipp Reisner 
1990c3470cdeSLars Ellenberg 	if (!data_size)
1991fc5be839SAndreas Gruenbacher 		return 0;
1992c3470cdeSLars Ellenberg 
199369a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1994b411b363SPhilipp Reisner 
1995b411b363SPhilipp Reisner 	data = kmap(page);
1996b411b363SPhilipp Reisner 	while (data_size) {
1997fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1998fc5be839SAndreas Gruenbacher 
199969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
2000a5c31904SAndreas Gruenbacher 		if (err)
2001b411b363SPhilipp Reisner 			break;
2002a5c31904SAndreas Gruenbacher 		data_size -= len;
2003b411b363SPhilipp Reisner 	}
2004b411b363SPhilipp Reisner 	kunmap(page);
200569a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
2006fc5be839SAndreas Gruenbacher 	return err;
2007b411b363SPhilipp Reisner }
2008b411b363SPhilipp Reisner 
200969a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
2010b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
2011b411b363SPhilipp Reisner {
20127988613bSKent Overstreet 	struct bio_vec bvec;
20137988613bSKent Overstreet 	struct bvec_iter iter;
2014b411b363SPhilipp Reisner 	struct bio *bio;
201511f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
201669a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
201769a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
2018b411b363SPhilipp Reisner 
201911f8b2b6SAndreas Gruenbacher 	digest_size = 0;
202069a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
20213d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
202211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
2023a5c31904SAndreas Gruenbacher 		if (err)
2024a5c31904SAndreas Gruenbacher 			return err;
202511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
202688104ca4SAndreas Gruenbacher 	}
2027b411b363SPhilipp Reisner 
2028b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2029b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
203069a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2031b411b363SPhilipp Reisner 
2032b411b363SPhilipp Reisner 	bio = req->master_bio;
203369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2034b411b363SPhilipp Reisner 
20357988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20367988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
20377988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
203869a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20397988613bSKent Overstreet 		kunmap(bvec.bv_page);
2040a5c31904SAndreas Gruenbacher 		if (err)
2041a5c31904SAndreas Gruenbacher 			return err;
2042a5c31904SAndreas Gruenbacher 		data_size -= expect;
2043b411b363SPhilipp Reisner 	}
2044b411b363SPhilipp Reisner 
204511f8b2b6SAndreas Gruenbacher 	if (digest_size) {
204669a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
204711f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
204869a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
204928284cefSAndreas Gruenbacher 			return -EINVAL;
2050b411b363SPhilipp Reisner 		}
2051b411b363SPhilipp Reisner 	}
2052b411b363SPhilipp Reisner 
205369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
205428284cefSAndreas Gruenbacher 	return 0;
2055b411b363SPhilipp Reisner }
2056b411b363SPhilipp Reisner 
2057a990be46SAndreas Gruenbacher /*
2058668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2059a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2060a990be46SAndreas Gruenbacher  */
206199920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2062b411b363SPhilipp Reisner {
20638050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2064a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2065a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2066a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2067db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
206899920dc5SAndreas Gruenbacher 	int err;
2069b411b363SPhilipp Reisner 
20700b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2071b411b363SPhilipp Reisner 
2072db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2073b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2074a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2075b411b363SPhilipp Reisner 	} else {
2076b411b363SPhilipp Reisner 		/* Record failure to sync */
2077b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2078b411b363SPhilipp Reisner 
2079a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2080b411b363SPhilipp Reisner 	}
2081b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2082b411b363SPhilipp Reisner 
208399920dc5SAndreas Gruenbacher 	return err;
2084b411b363SPhilipp Reisner }
2085b411b363SPhilipp Reisner 
208669a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2087a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2088b411b363SPhilipp Reisner {
208969a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2090db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2091b411b363SPhilipp Reisner 
2092a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2093db830c46SAndreas Gruenbacher 	if (!peer_req)
209445bb912bSLars Ellenberg 		goto fail;
2095b411b363SPhilipp Reisner 
2096b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2097b411b363SPhilipp Reisner 
2098b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2099b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2100b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2101b411b363SPhilipp Reisner 
2102a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
210321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
210445bb912bSLars Ellenberg 
21050500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2106b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
21070500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2108b411b363SPhilipp Reisner 
2109a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2110bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2111bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
2112e1c1b0fcSAndreas Gruenbacher 		return 0;
211345bb912bSLars Ellenberg 
211410f6d992SLars Ellenberg 	/* don't care for the reason here */
2115d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
21160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2117a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
21180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
211922cc37a9SLars Ellenberg 
2120b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
212145bb912bSLars Ellenberg fail:
2122b30ab791SAndreas Gruenbacher 	put_ldev(device);
2123e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2124b411b363SPhilipp Reisner }
2125b411b363SPhilipp Reisner 
2126668eebc6SAndreas Gruenbacher static struct drbd_request *
2127b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2128bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2129b411b363SPhilipp Reisner {
2130b411b363SPhilipp Reisner 	struct drbd_request *req;
2131668eebc6SAndreas Gruenbacher 
2132bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2133bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21345e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2135668eebc6SAndreas Gruenbacher 		return req;
2136c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2137d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2138c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2139c3afd8f5SAndreas Gruenbacher 	}
2140668eebc6SAndreas Gruenbacher 	return NULL;
2141668eebc6SAndreas Gruenbacher }
2142668eebc6SAndreas Gruenbacher 
2143bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2144b411b363SPhilipp Reisner {
21459f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2146b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2147b411b363SPhilipp Reisner 	struct drbd_request *req;
2148b411b363SPhilipp Reisner 	sector_t sector;
214982bc0194SAndreas Gruenbacher 	int err;
2150e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21514a76b161SAndreas Gruenbacher 
21529f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21539f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21544a76b161SAndreas Gruenbacher 		return -EIO;
21559f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2156b411b363SPhilipp Reisner 
2157b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2158b411b363SPhilipp Reisner 
21590500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2160b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21610500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2162c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
216382bc0194SAndreas Gruenbacher 		return -EIO;
2164b411b363SPhilipp Reisner 
216524c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
2166b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
2167b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
216869a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
216982bc0194SAndreas Gruenbacher 	if (!err)
21708554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2171b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2172b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2173b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2174b411b363SPhilipp Reisner 
217582bc0194SAndreas Gruenbacher 	return err;
2176b411b363SPhilipp Reisner }
2177b411b363SPhilipp Reisner 
2178bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2179b411b363SPhilipp Reisner {
21809f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2181b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2182b411b363SPhilipp Reisner 	sector_t sector;
218382bc0194SAndreas Gruenbacher 	int err;
2184e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21854a76b161SAndreas Gruenbacher 
21869f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21879f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21884a76b161SAndreas Gruenbacher 		return -EIO;
21899f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2190b411b363SPhilipp Reisner 
2191b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21920b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2193b411b363SPhilipp Reisner 
2194b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2195b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2196b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2197fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2198a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2199b411b363SPhilipp Reisner 	} else {
2200b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2201d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2202b411b363SPhilipp Reisner 
220369a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2204b411b363SPhilipp Reisner 
220569a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2206b411b363SPhilipp Reisner 	}
2207b411b363SPhilipp Reisner 
2208b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2209778f271dSPhilipp Reisner 
221082bc0194SAndreas Gruenbacher 	return err;
2211b411b363SPhilipp Reisner }
2212b411b363SPhilipp Reisner 
2213b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
22147be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2215b411b363SPhilipp Reisner {
22167be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
22177be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2218b411b363SPhilipp Reisner 
2219b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22207be8da07SAndreas Gruenbacher 		if (!i->local)
22217be8da07SAndreas Gruenbacher 			continue;
22227be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
22237be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
22247be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
22257be8da07SAndreas Gruenbacher 			continue;
22262312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
22272312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2228d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
22297be8da07SAndreas Gruenbacher 	}
22307be8da07SAndreas Gruenbacher }
22317be8da07SAndreas Gruenbacher 
2232a990be46SAndreas Gruenbacher /*
2233668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2234b411b363SPhilipp Reisner  */
223599920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2236b411b363SPhilipp Reisner {
22378050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2238a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2239a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2240a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2241db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
224299920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2243b411b363SPhilipp Reisner 
2244303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2245db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2246b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2247b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2248db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2249b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2250a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2251b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2252b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2253b411b363SPhilipp Reisner 		} else {
2254a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2255b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2256b411b363SPhilipp Reisner 			 * maybe assert this?  */
2257b411b363SPhilipp Reisner 		}
2258b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2259b411b363SPhilipp Reisner 	}
226008d0dabfSLars Ellenberg 
2261b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2262b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2263302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22640500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22650b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2266b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22677be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2268b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22690500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2270bb3bfe96SAndreas Gruenbacher 	} else
22710b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2272b411b363SPhilipp Reisner 
22735dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2274b411b363SPhilipp Reisner 
227599920dc5SAndreas Gruenbacher 	return err;
2276b411b363SPhilipp Reisner }
2277b411b363SPhilipp Reisner 
2278a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2279b411b363SPhilipp Reisner {
22808050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2281a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2282a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
228399920dc5SAndreas Gruenbacher 	int err;
2284b411b363SPhilipp Reisner 
2285a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2286a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2287b411b363SPhilipp Reisner 
228899920dc5SAndreas Gruenbacher 	return err;
2289b411b363SPhilipp Reisner }
2290b411b363SPhilipp Reisner 
2291d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2292b6a370baSPhilipp Reisner {
2293a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22947be8da07SAndreas Gruenbacher }
2295b6a370baSPhilipp Reisner 
229699920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22977be8da07SAndreas Gruenbacher {
2298a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2299a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2300a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
23017be8da07SAndreas Gruenbacher 
2302a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2303d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
23047be8da07SAndreas Gruenbacher }
23057be8da07SAndreas Gruenbacher 
23063e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
23073e394da1SAndreas Gruenbacher {
23083e394da1SAndreas Gruenbacher 	/*
23093e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
23103e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
23113e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
23123e394da1SAndreas Gruenbacher 	 */
23133e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
23143e394da1SAndreas Gruenbacher }
23153e394da1SAndreas Gruenbacher 
23163e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
23173e394da1SAndreas Gruenbacher {
23183e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
23193e394da1SAndreas Gruenbacher }
23203e394da1SAndreas Gruenbacher 
232169a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
23223e394da1SAndreas Gruenbacher {
232369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
23243c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
23253e394da1SAndreas Gruenbacher 
232669a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2327b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2328b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2329b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2330b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2331b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23323c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2333b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23343e394da1SAndreas Gruenbacher 	}
23357be8da07SAndreas Gruenbacher }
23363e394da1SAndreas Gruenbacher 
2337d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2338d93f6302SLars Ellenberg {
2339d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2340d93f6302SLars Ellenberg }
2341d93f6302SLars Ellenberg 
2342d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2343b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2344d93f6302SLars Ellenberg {
2345d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23467e5fec31SFabian Frederick 	bool rv = false;
2347b6a370baSPhilipp Reisner 
23480500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2349a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2350d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2351d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23527e5fec31SFabian Frederick 			rv = true;
2353b6a370baSPhilipp Reisner 			break;
2354b6a370baSPhilipp Reisner 		}
2355b6a370baSPhilipp Reisner 	}
23560500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2357b6a370baSPhilipp Reisner 
2358b6a370baSPhilipp Reisner 	return rv;
2359b6a370baSPhilipp Reisner }
2360b6a370baSPhilipp Reisner 
2361b411b363SPhilipp Reisner /* Called from receive_Data.
2362b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2363b411b363SPhilipp Reisner  *
2364b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2365b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2366b411b363SPhilipp Reisner  * been sent.
2367b411b363SPhilipp Reisner  *
2368b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2369b411b363SPhilipp Reisner  *
2370b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2371b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2372b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2373b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2374b411b363SPhilipp Reisner  *
2375b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2376b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2377b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2378b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2379b411b363SPhilipp Reisner  *
2380b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2381b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
238269a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2383b411b363SPhilipp Reisner {
238469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2385b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2386b411b363SPhilipp Reisner 	long timeout;
2387b874d231SPhilipp Reisner 	int ret = 0, tp;
23887be8da07SAndreas Gruenbacher 
238969a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23907be8da07SAndreas Gruenbacher 		return 0;
23917be8da07SAndreas Gruenbacher 
2392b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2393b411b363SPhilipp Reisner 	for (;;) {
2394b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2395b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2396b411b363SPhilipp Reisner 			break;
23977be8da07SAndreas Gruenbacher 		}
2398b874d231SPhilipp Reisner 
2399b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2400b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2401b411b363SPhilipp Reisner 			break;
2402b411b363SPhilipp Reisner 		}
2403b874d231SPhilipp Reisner 
2404b874d231SPhilipp Reisner 		rcu_read_lock();
24055dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2406b874d231SPhilipp Reisner 		rcu_read_unlock();
2407b874d231SPhilipp Reisner 
2408b874d231SPhilipp Reisner 		if (!tp)
2409b874d231SPhilipp Reisner 			break;
2410b874d231SPhilipp Reisner 
2411b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2412b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2413b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
241444ed167dSPhilipp Reisner 		rcu_read_lock();
241569a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
241644ed167dSPhilipp Reisner 		rcu_read_unlock();
241771b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2418b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
24197be8da07SAndreas Gruenbacher 		if (!timeout) {
2420b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2421d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2422b411b363SPhilipp Reisner 			break;
2423b411b363SPhilipp Reisner 		}
2424b411b363SPhilipp Reisner 	}
2425b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2426b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2427b411b363SPhilipp Reisner 	return ret;
2428b411b363SPhilipp Reisner }
2429b411b363SPhilipp Reisner 
2430688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2431688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2432688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2433bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf)
243476d2e7ecSPhilipp Reisner {
243576d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
243676d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
243728a8f0d3SMike Christie 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2438bb3cc85eSMike Christie }
2439bb3cc85eSMike Christie 
2440bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf)
2441bb3cc85eSMike Christie {
2442f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
244345c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2444f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2445f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2446f31e583aSLars Ellenberg 	if (dpf & DP_WSAME)
2447f31e583aSLars Ellenberg 		return REQ_OP_WRITE_SAME;
2448bb3cc85eSMike Christie 	else
2449bb3cc85eSMike Christie 		return REQ_OP_WRITE;
245076d2e7ecSPhilipp Reisner }
245176d2e7ecSPhilipp Reisner 
2452b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24537be8da07SAndreas Gruenbacher 				    unsigned int size)
2454b411b363SPhilipp Reisner {
24557be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24567be8da07SAndreas Gruenbacher 
24577be8da07SAndreas Gruenbacher     repeat:
2458b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24597be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24607be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24617be8da07SAndreas Gruenbacher 
24627be8da07SAndreas Gruenbacher 		if (!i->local)
24637be8da07SAndreas Gruenbacher 			continue;
24647be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24657be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24667be8da07SAndreas Gruenbacher 			continue;
24677be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24687be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24690500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24707be8da07SAndreas Gruenbacher 		if (m.bio)
2471b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24720500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24737be8da07SAndreas Gruenbacher 		goto repeat;
24747be8da07SAndreas Gruenbacher 	}
24757be8da07SAndreas Gruenbacher }
24767be8da07SAndreas Gruenbacher 
2477b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24787be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24797be8da07SAndreas Gruenbacher {
2480e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2481bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24827be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24837be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24847be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24857be8da07SAndreas Gruenbacher 	bool equal;
24867be8da07SAndreas Gruenbacher 	int err;
24877be8da07SAndreas Gruenbacher 
24887be8da07SAndreas Gruenbacher 	/*
24897be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24907be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24917be8da07SAndreas Gruenbacher 	 */
2492b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24937be8da07SAndreas Gruenbacher 
24947be8da07SAndreas Gruenbacher     repeat:
2495b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24967be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24977be8da07SAndreas Gruenbacher 			continue;
249808d0dabfSLars Ellenberg 		if (i->completed)
249908d0dabfSLars Ellenberg 			continue;
25007be8da07SAndreas Gruenbacher 
25017be8da07SAndreas Gruenbacher 		if (!i->local) {
25027be8da07SAndreas Gruenbacher 			/*
25037be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
25047be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
25057be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
25067be8da07SAndreas Gruenbacher 			 */
2507b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
25087be8da07SAndreas Gruenbacher 			if (err)
25097be8da07SAndreas Gruenbacher 				goto out;
25107be8da07SAndreas Gruenbacher 			goto repeat;
25117be8da07SAndreas Gruenbacher 		}
25127be8da07SAndreas Gruenbacher 
25137be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
25147be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
25157be8da07SAndreas Gruenbacher 			/*
25167be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2517d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2518d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2519d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
25207be8da07SAndreas Gruenbacher 			 */
2521d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
25227be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
25237be8da07SAndreas Gruenbacher 
25247be8da07SAndreas Gruenbacher 			if (!equal)
2525d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25267be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
25277be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
25287be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25297be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2530d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
25317be8da07SAndreas Gruenbacher 
2532a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
25337be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2534a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2535668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25367be8da07SAndreas Gruenbacher 
25377be8da07SAndreas Gruenbacher 			err = -ENOENT;
25387be8da07SAndreas Gruenbacher 			goto out;
25397be8da07SAndreas Gruenbacher 		} else {
25407be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25417be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25427be8da07SAndreas Gruenbacher 
25437be8da07SAndreas Gruenbacher 			if (!equal)
2544d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25457be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25467be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25477be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25487be8da07SAndreas Gruenbacher 
25497be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25507be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25517be8da07SAndreas Gruenbacher 				/*
25527be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2553d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2554d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2555d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25567be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25577be8da07SAndreas Gruenbacher 				 *
25587be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25597be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25607be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25617be8da07SAndreas Gruenbacher 				 */
2562b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25637be8da07SAndreas Gruenbacher 				if (err) {
2564e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2565b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25667be8da07SAndreas Gruenbacher 					goto out;
25677be8da07SAndreas Gruenbacher 				}
25687be8da07SAndreas Gruenbacher 				goto repeat;
25697be8da07SAndreas Gruenbacher 			}
25707be8da07SAndreas Gruenbacher 			/*
25717be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25727be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25737be8da07SAndreas Gruenbacher 			 */
25747be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25757be8da07SAndreas Gruenbacher 		}
25767be8da07SAndreas Gruenbacher 	}
25777be8da07SAndreas Gruenbacher 	err = 0;
25787be8da07SAndreas Gruenbacher 
25797be8da07SAndreas Gruenbacher     out:
25807be8da07SAndreas Gruenbacher 	if (err)
2581b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25827be8da07SAndreas Gruenbacher 	return err;
25837be8da07SAndreas Gruenbacher }
25847be8da07SAndreas Gruenbacher 
2585b411b363SPhilipp Reisner /* mirrored write */
2586bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2587b411b363SPhilipp Reisner {
25889f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2589b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
259021ae5d7fSLars Ellenberg 	struct net_conf *nc;
2591b411b363SPhilipp Reisner 	sector_t sector;
2592db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2593e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25947be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2595bb3cc85eSMike Christie 	int op, op_flags;
2596b411b363SPhilipp Reisner 	u32 dp_flags;
2597302bdeaeSPhilipp Reisner 	int err, tp;
25987be8da07SAndreas Gruenbacher 
25999f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
26009f4fe9adSAndreas Gruenbacher 	if (!peer_device)
26014a76b161SAndreas Gruenbacher 		return -EIO;
26029f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2603b411b363SPhilipp Reisner 
2604b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
260582bc0194SAndreas Gruenbacher 		int err2;
2606b411b363SPhilipp Reisner 
260769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
260869a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2609bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
261069a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
261182bc0194SAndreas Gruenbacher 		if (!err)
261282bc0194SAndreas Gruenbacher 			err = err2;
261382bc0194SAndreas Gruenbacher 		return err;
2614b411b363SPhilipp Reisner 	}
2615b411b363SPhilipp Reisner 
2616fcefa62eSAndreas Gruenbacher 	/*
2617fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2618fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2619fcefa62eSAndreas Gruenbacher 	 * end of this function.
2620fcefa62eSAndreas Gruenbacher 	 */
2621b411b363SPhilipp Reisner 
2622b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2623a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2624db830c46SAndreas Gruenbacher 	if (!peer_req) {
2625b30ab791SAndreas Gruenbacher 		put_ldev(device);
262682bc0194SAndreas Gruenbacher 		return -EIO;
2627b411b363SPhilipp Reisner 	}
2628b411b363SPhilipp Reisner 
2629a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
263021ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
263121ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2632b411b363SPhilipp Reisner 
2633688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2634bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2635bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2636a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2637a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2638f31e583aSLars Ellenberg 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2639f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2640f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2641f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2642f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2643f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2644f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2645f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
264645c21793SChristoph Hellwig 		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2647a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2648f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2649f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2650f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2651a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26520b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26530b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2654a73ff323SLars Ellenberg 	}
2655688593c5SLars Ellenberg 
2656688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2657db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2658688593c5SLars Ellenberg 
2659bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2660bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2661db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2662db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2663bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2664b411b363SPhilipp Reisner 
2665302bdeaeSPhilipp Reisner 	rcu_read_lock();
266621ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
266721ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
266821ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
266921ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
267021ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
267121ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
267221ae5d7fSLars Ellenberg 			break;
267321ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
267421ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
267521ae5d7fSLars Ellenberg 			break;
267621ae5d7fSLars Ellenberg 		}
267721ae5d7fSLars Ellenberg 	}
2678302bdeaeSPhilipp Reisner 	rcu_read_unlock();
267921ae5d7fSLars Ellenberg 
268021ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
268121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
268221ae5d7fSLars Ellenberg 		inc_unacked(device);
268321ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
268421ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
268521ae5d7fSLars Ellenberg 	}
268621ae5d7fSLars Ellenberg 
268721ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
268821ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
268921ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26905dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
269121ae5d7fSLars Ellenberg 	}
269221ae5d7fSLars Ellenberg 
2693302bdeaeSPhilipp Reisner 	if (tp) {
269421ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
269521ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2696302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
269769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26987be8da07SAndreas Gruenbacher 		if (err)
2699b411b363SPhilipp Reisner 			goto out_interrupted;
27000500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2701b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
27027be8da07SAndreas Gruenbacher 		if (err) {
27030500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
27047be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2705b30ab791SAndreas Gruenbacher 				put_ldev(device);
270682bc0194SAndreas Gruenbacher 				return 0;
2707b411b363SPhilipp Reisner 			}
2708b411b363SPhilipp Reisner 			goto out_interrupted;
2709b411b363SPhilipp Reisner 		}
2710b874d231SPhilipp Reisner 	} else {
271169a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
27120500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2713b874d231SPhilipp Reisner 	}
27149104d31aSLars Ellenberg 	/* TRIM and WRITE_SAME are processed synchronously,
27159104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2716a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2717a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2718f31e583aSLars Ellenberg 	if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
2719b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
27200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2721b411b363SPhilipp Reisner 
2722b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2723b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2724b6a370baSPhilipp Reisner 
2725b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2726b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2727b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2728db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
27294dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
273021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2731b411b363SPhilipp Reisner 	}
2732b411b363SPhilipp Reisner 
2733bb3cc85eSMike Christie 	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2734bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
273582bc0194SAndreas Gruenbacher 	if (!err)
273682bc0194SAndreas Gruenbacher 		return 0;
2737b411b363SPhilipp Reisner 
273810f6d992SLars Ellenberg 	/* don't care for the reason here */
2739d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27400500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2741a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2742b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27430500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
274421ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
274521ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2746b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
274721ae5d7fSLars Ellenberg 	}
274822cc37a9SLars Ellenberg 
2749b411b363SPhilipp Reisner out_interrupted:
27507e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2751b30ab791SAndreas Gruenbacher 	put_ldev(device);
2752b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
275382bc0194SAndreas Gruenbacher 	return err;
2754b411b363SPhilipp Reisner }
2755b411b363SPhilipp Reisner 
27560f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27570f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27580f0601f4SLars Ellenberg  *
27590f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27600f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27610f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27620f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27630f0601f4SLars Ellenberg  *
27640f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27650f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27660f0601f4SLars Ellenberg  */
2767ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2768ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2769e8299874SLars Ellenberg {
2770e8299874SLars Ellenberg 	struct lc_element *tmp;
2771ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2772e8299874SLars Ellenberg 
2773ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2774ad3fee79SLars Ellenberg 		return throttle;
2775e8299874SLars Ellenberg 
2776e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2777e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2778e8299874SLars Ellenberg 	if (tmp) {
2779e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2780e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2781e8299874SLars Ellenberg 			throttle = false;
2782ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2783ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2784e8299874SLars Ellenberg 	}
2785e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2786e8299874SLars Ellenberg 
2787e8299874SLars Ellenberg 	return throttle;
2788e8299874SLars Ellenberg }
2789e8299874SLars Ellenberg 
2790e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27910f0601f4SLars Ellenberg {
27928c40c7c4SChristoph Hellwig 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27930f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2794daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2795e8299874SLars Ellenberg 	int curr_events;
2796daeda1ccSPhilipp Reisner 
2797daeda1ccSPhilipp Reisner 	rcu_read_lock();
2798b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2799daeda1ccSPhilipp Reisner 	rcu_read_unlock();
28000f0601f4SLars Ellenberg 
28010f0601f4SLars Ellenberg 	/* feature disabled? */
2802daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2803e8299874SLars Ellenberg 		return false;
2804e3555d85SPhilipp Reisner 
28058446fe92SChristoph Hellwig 	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2806b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2807ad3fee79SLars Ellenberg 
2808ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2809ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
28100f0601f4SLars Ellenberg 		unsigned long rs_left;
28110f0601f4SLars Ellenberg 		int i;
28120f0601f4SLars Ellenberg 
2813b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
28140f0601f4SLars Ellenberg 
28150f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
28160f0601f4SLars Ellenberg 		 * approx. */
2817b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
28182649f080SLars Ellenberg 
2819b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2820b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
28212649f080SLars Ellenberg 		else
2822b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
28230f0601f4SLars Ellenberg 
2824b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
28250f0601f4SLars Ellenberg 		if (!dt)
28260f0601f4SLars Ellenberg 			dt++;
2827b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
28280f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
28290f0601f4SLars Ellenberg 
2830daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2831e8299874SLars Ellenberg 			return true;
28320f0601f4SLars Ellenberg 	}
2833e8299874SLars Ellenberg 	return false;
28340f0601f4SLars Ellenberg }
28350f0601f4SLars Ellenberg 
2836bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2837b411b363SPhilipp Reisner {
28389f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2839b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2840b411b363SPhilipp Reisner 	sector_t sector;
28414a76b161SAndreas Gruenbacher 	sector_t capacity;
2842db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2843b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2844b18b37beSPhilipp Reisner 	int size, verb;
2845b411b363SPhilipp Reisner 	unsigned int fault_type;
2846e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28474a76b161SAndreas Gruenbacher 
28489f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28499f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28504a76b161SAndreas Gruenbacher 		return -EIO;
28519f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2852155bd9d1SChristoph Hellwig 	capacity = get_capacity(device->vdisk);
2853b411b363SPhilipp Reisner 
2854b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2855b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2856b411b363SPhilipp Reisner 
2857c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2858d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2859b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
286082bc0194SAndreas Gruenbacher 		return -EINVAL;
2861b411b363SPhilipp Reisner 	}
2862b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2863d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2864b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
286582bc0194SAndreas Gruenbacher 		return -EINVAL;
2866b411b363SPhilipp Reisner 	}
2867b411b363SPhilipp Reisner 
2868b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2869b18b37beSPhilipp Reisner 		verb = 1;
2870e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2871b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
287269a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2873b18b37beSPhilipp Reisner 			break;
2874700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2875b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2876b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2877b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
287869a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2879b18b37beSPhilipp Reisner 			break;
2880b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2881b18b37beSPhilipp Reisner 			verb = 0;
2882b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
288369a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2884b18b37beSPhilipp Reisner 			break;
2885b18b37beSPhilipp Reisner 		default:
288649ba9b1bSAndreas Gruenbacher 			BUG();
2887b18b37beSPhilipp Reisner 		}
2888b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2889d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2890b411b363SPhilipp Reisner 			    "no local data.\n");
2891b18b37beSPhilipp Reisner 
2892a821cc4aSLars Ellenberg 		/* drain possibly payload */
289369a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2894b411b363SPhilipp Reisner 	}
2895b411b363SPhilipp Reisner 
2896b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2897b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2898b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2899a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
29009104d31aSLars Ellenberg 			size, GFP_NOIO);
2901db830c46SAndreas Gruenbacher 	if (!peer_req) {
2902b30ab791SAndreas Gruenbacher 		put_ldev(device);
290382bc0194SAndreas Gruenbacher 		return -ENOMEM;
2904b411b363SPhilipp Reisner 	}
2905b411b363SPhilipp Reisner 
2906e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2907b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2908a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2909b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
291080a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
291121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
291280a40e43SLars Ellenberg 		goto submit;
291380a40e43SLars Ellenberg 
2914700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2915700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2916700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2917700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2918700ca8c0SPhilipp Reisner 		   the block... */
2919700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2920df561f66SGustavo A. R. Silva 		fallthrough;
2921b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2922a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2923b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
29245f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2925b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2926b411b363SPhilipp Reisner 		break;
2927b411b363SPhilipp Reisner 
2928b411b363SPhilipp Reisner 	case P_OV_REPLY:
2929b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2930b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2931e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2932b411b363SPhilipp Reisner 		if (!di)
2933b411b363SPhilipp Reisner 			goto out_free_e;
2934b411b363SPhilipp Reisner 
2935e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2936b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2937b411b363SPhilipp Reisner 
2938db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2939db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2940c36c3cedSLars Ellenberg 
29419f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2942b411b363SPhilipp Reisner 			goto out_free_e;
2943b411b363SPhilipp Reisner 
2944e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29459f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2946a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29475f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2948b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2949aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2950aaaba345SLars Ellenberg 			device->use_csums = true;
2951e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29522649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2953b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2954a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2955b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29560f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29570f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29580f0601f4SLars Ellenberg 			goto submit_for_resync;
2959b411b363SPhilipp Reisner 		}
2960b411b363SPhilipp Reisner 		break;
2961b411b363SPhilipp Reisner 
2962b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2963b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29649f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2965de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2966de228bbaSLars Ellenberg 			int i;
2967b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2968b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2969b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2970b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2971de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2972b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2973b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2974de228bbaSLars Ellenberg 			}
2975d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2976b411b363SPhilipp Reisner 					(unsigned long long)sector);
2977b411b363SPhilipp Reisner 		}
2978a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2979b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2980b411b363SPhilipp Reisner 		break;
2981b411b363SPhilipp Reisner 
2982b411b363SPhilipp Reisner 	default:
298349ba9b1bSAndreas Gruenbacher 		BUG();
2984b411b363SPhilipp Reisner 	}
2985b411b363SPhilipp Reisner 
29860f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29870f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29880f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29890f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29900f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29910f0601f4SLars Ellenberg 	 *
29920f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29930f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29940f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29950f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29960f0601f4SLars Ellenberg 	 * a while, anyways.
29970f0601f4SLars Ellenberg 	 */
2998b411b363SPhilipp Reisner 
29990f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
30000f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
30010f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
30020f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
30030f0601f4SLars Ellenberg 	 *
30040f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
30050f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
30060f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
30070f0601f4SLars Ellenberg 	 */
3008c5a2c150SLars Ellenberg 
3009c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
3010c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
3011c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
3012c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
3013c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
3014c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
3015c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
3016c5a2c150SLars Ellenberg 
3017944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
3018ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
3019ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
3020e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
3021944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
3022b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
302380a40e43SLars Ellenberg 		goto out_free_e;
3024b411b363SPhilipp Reisner 
30250f0601f4SLars Ellenberg submit_for_resync:
3026b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
30270f0601f4SLars Ellenberg 
302880a40e43SLars Ellenberg submit:
3029944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
3030b30ab791SAndreas Gruenbacher 	inc_unacked(device);
3031bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
3032bb3cc85eSMike Christie 				     fault_type) == 0)
303382bc0194SAndreas Gruenbacher 		return 0;
3034b411b363SPhilipp Reisner 
303510f6d992SLars Ellenberg 	/* don't care for the reason here */
3036d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
3037c5a2c150SLars Ellenberg 
3038c5a2c150SLars Ellenberg out_free_e:
30390500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
3040a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
30410500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
304222cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
304322cc37a9SLars Ellenberg 
3044b30ab791SAndreas Gruenbacher 	put_ldev(device);
3045b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
304682bc0194SAndreas Gruenbacher 	return -EIO;
3047b411b363SPhilipp Reisner }
3048b411b363SPhilipp Reisner 
30499b48ff07SLee Jones /*
305069a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
305169a22773SAndreas Gruenbacher  */
305269a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3053b411b363SPhilipp Reisner {
305469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3055b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3056b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
305744ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3058b411b363SPhilipp Reisner 
3059b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3060b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3061b411b363SPhilipp Reisner 
3062b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3063b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3064b411b363SPhilipp Reisner 
306544ed167dSPhilipp Reisner 	rcu_read_lock();
306669a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
306744ed167dSPhilipp Reisner 	rcu_read_unlock();
306844ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3069b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3070b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3071b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
307244ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3073d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3074b411b363SPhilipp Reisner 		break;
3075b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3076b411b363SPhilipp Reisner 		break;
3077b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3078b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3079b411b363SPhilipp Reisner 			rv = -1;
3080b411b363SPhilipp Reisner 			break;
3081b411b363SPhilipp Reisner 		}
3082b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3083b411b363SPhilipp Reisner 			rv =  1;
3084b411b363SPhilipp Reisner 			break;
3085b411b363SPhilipp Reisner 		}
3086df561f66SGustavo A. R. Silva 		fallthrough;	/* to one of the other strategies */
3087b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3088b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3089b411b363SPhilipp Reisner 			rv = 1;
3090b411b363SPhilipp Reisner 			break;
3091b411b363SPhilipp Reisner 		}
3092b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3093b411b363SPhilipp Reisner 			rv = -1;
3094b411b363SPhilipp Reisner 			break;
3095b411b363SPhilipp Reisner 		}
3096b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3097d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3098b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3099df561f66SGustavo A. R. Silva 		fallthrough;
3100b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3101b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
310269a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3103b411b363SPhilipp Reisner 				? -1 : 1;
3104b411b363SPhilipp Reisner 			break;
3105b411b363SPhilipp Reisner 		} else {
3106b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3107b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3108b411b363SPhilipp Reisner 		}
310944ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3110b411b363SPhilipp Reisner 			break;
3111df561f66SGustavo A. R. Silva 		fallthrough;
3112b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3113b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3114b411b363SPhilipp Reisner 			rv = -1;
3115b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3116b411b363SPhilipp Reisner 			rv =  1;
3117b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3118b411b363SPhilipp Reisner 		     /* Well, then use something else. */
311969a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3120b411b363SPhilipp Reisner 				? -1 : 1;
3121b411b363SPhilipp Reisner 		break;
3122b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3123b411b363SPhilipp Reisner 		rv = -1;
3124b411b363SPhilipp Reisner 		break;
3125b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3126b411b363SPhilipp Reisner 		rv =  1;
3127b411b363SPhilipp Reisner 	}
3128b411b363SPhilipp Reisner 
3129b411b363SPhilipp Reisner 	return rv;
3130b411b363SPhilipp Reisner }
3131b411b363SPhilipp Reisner 
31329b48ff07SLee Jones /*
313369a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
313469a22773SAndreas Gruenbacher  */
313569a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3136b411b363SPhilipp Reisner {
313769a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31386184ea21SAndreas Gruenbacher 	int hg, rv = -100;
313944ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3140b411b363SPhilipp Reisner 
314144ed167dSPhilipp Reisner 	rcu_read_lock();
314269a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
314344ed167dSPhilipp Reisner 	rcu_read_unlock();
314444ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3145b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3146b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3147b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3148b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3149b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
315044ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3151d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3152b411b363SPhilipp Reisner 		break;
3153b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3154b411b363SPhilipp Reisner 		break;
3155b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
315669a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3157b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3158b411b363SPhilipp Reisner 			rv = hg;
3159b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3160b411b363SPhilipp Reisner 			rv = hg;
3161b411b363SPhilipp Reisner 		break;
3162b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
316369a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3164b411b363SPhilipp Reisner 		break;
3165b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3166b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3167b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
316869a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3169b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3170bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3171bb437946SAndreas Gruenbacher 
3172b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3173b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3174b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3175b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3176bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3177b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3178b411b363SPhilipp Reisner 			} else {
3179d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3180b411b363SPhilipp Reisner 				rv = hg;
3181b411b363SPhilipp Reisner 			}
3182b411b363SPhilipp Reisner 		} else
3183b411b363SPhilipp Reisner 			rv = hg;
3184b411b363SPhilipp Reisner 	}
3185b411b363SPhilipp Reisner 
3186b411b363SPhilipp Reisner 	return rv;
3187b411b363SPhilipp Reisner }
3188b411b363SPhilipp Reisner 
31899b48ff07SLee Jones /*
319069a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
319169a22773SAndreas Gruenbacher  */
319269a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3193b411b363SPhilipp Reisner {
319469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31956184ea21SAndreas Gruenbacher 	int hg, rv = -100;
319644ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3197b411b363SPhilipp Reisner 
319844ed167dSPhilipp Reisner 	rcu_read_lock();
319969a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
320044ed167dSPhilipp Reisner 	rcu_read_unlock();
320144ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3202b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3203b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3204b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3205b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3206b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3207b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3208b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
320944ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3210d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3211b411b363SPhilipp Reisner 		break;
3212b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
321369a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3214b411b363SPhilipp Reisner 		break;
3215b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3216b411b363SPhilipp Reisner 		break;
3217b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
321869a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3219b411b363SPhilipp Reisner 		if (hg == -1) {
3220bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3221bb437946SAndreas Gruenbacher 
3222b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3223b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3224b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3225b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3226bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3227b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3228b411b363SPhilipp Reisner 			} else {
3229d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3230b411b363SPhilipp Reisner 				rv = hg;
3231b411b363SPhilipp Reisner 			}
3232b411b363SPhilipp Reisner 		} else
3233b411b363SPhilipp Reisner 			rv = hg;
3234b411b363SPhilipp Reisner 	}
3235b411b363SPhilipp Reisner 
3236b411b363SPhilipp Reisner 	return rv;
3237b411b363SPhilipp Reisner }
3238b411b363SPhilipp Reisner 
3239b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3240b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3241b411b363SPhilipp Reisner {
3242b411b363SPhilipp Reisner 	if (!uuid) {
3243d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3244b411b363SPhilipp Reisner 		return;
3245b411b363SPhilipp Reisner 	}
3246d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3247b411b363SPhilipp Reisner 	     text,
3248b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3249b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3250b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3251b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3252b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3253b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3254b411b363SPhilipp Reisner }
3255b411b363SPhilipp Reisner 
3256b411b363SPhilipp Reisner /*
3257b411b363SPhilipp Reisner   100	after split brain try auto recover
3258b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3259b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3260b411b363SPhilipp Reisner     0	no Sync
3261b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3262b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3263b411b363SPhilipp Reisner  -100	after split brain, disconnect
3264b411b363SPhilipp Reisner -1000	unrelated data
32654a23f264SPhilipp Reisner -1091   requires proto 91
32664a23f264SPhilipp Reisner -1096   requires proto 96
3267b411b363SPhilipp Reisner  */
3268f2d3d75bSLars Ellenberg 
3269f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3270b411b363SPhilipp Reisner {
327144a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
327244a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3273b411b363SPhilipp Reisner 	u64 self, peer;
3274b411b363SPhilipp Reisner 	int i, j;
3275b411b363SPhilipp Reisner 
3276b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3277b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3278b411b363SPhilipp Reisner 
3279b411b363SPhilipp Reisner 	*rule_nr = 10;
3280b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3281b411b363SPhilipp Reisner 		return 0;
3282b411b363SPhilipp Reisner 
3283b411b363SPhilipp Reisner 	*rule_nr = 20;
3284b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3285b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3286b411b363SPhilipp Reisner 		return -2;
3287b411b363SPhilipp Reisner 
3288b411b363SPhilipp Reisner 	*rule_nr = 30;
3289b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3290b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3291b411b363SPhilipp Reisner 		return 2;
3292b411b363SPhilipp Reisner 
3293b411b363SPhilipp Reisner 	if (self == peer) {
3294b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3295b411b363SPhilipp Reisner 
3296b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3297b411b363SPhilipp Reisner 
329844a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32994a23f264SPhilipp Reisner 				return -1091;
3300b411b363SPhilipp Reisner 
3301b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3302b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3303d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3304b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3305b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3306b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3307b411b363SPhilipp Reisner 
3308b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3309b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3310b411b363SPhilipp Reisner 				*rule_nr = 34;
3311b411b363SPhilipp Reisner 			} else {
3312d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3313b411b363SPhilipp Reisner 				*rule_nr = 36;
3314b411b363SPhilipp Reisner 			}
3315b411b363SPhilipp Reisner 
3316b411b363SPhilipp Reisner 			return 1;
3317b411b363SPhilipp Reisner 		}
3318b411b363SPhilipp Reisner 
3319b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3320b411b363SPhilipp Reisner 
332144a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33224a23f264SPhilipp Reisner 				return -1091;
3323b411b363SPhilipp Reisner 
3324b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3325b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3326d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3327b411b363SPhilipp Reisner 
3328b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3329b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3330b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3331b411b363SPhilipp Reisner 
3332b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3333b411b363SPhilipp Reisner 				*rule_nr = 35;
3334b411b363SPhilipp Reisner 			} else {
3335d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3336b411b363SPhilipp Reisner 				*rule_nr = 37;
3337b411b363SPhilipp Reisner 			}
3338b411b363SPhilipp Reisner 
3339b411b363SPhilipp Reisner 			return -1;
3340b411b363SPhilipp Reisner 		}
3341b411b363SPhilipp Reisner 
3342b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3343b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3344b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3345b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3346b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3347b411b363SPhilipp Reisner 		*rule_nr = 40;
3348b411b363SPhilipp Reisner 
3349f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3350f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3351f2d3d75bSLars Ellenberg 		if (rct == 0)
3352f2d3d75bSLars Ellenberg 			return 0;
3353f2d3d75bSLars Ellenberg 
3354f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3355f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3356f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3357f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3358f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3359f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3360f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3361f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3362f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3363f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3364f2d3d75bSLars Ellenberg 			}
3365f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3366f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3367f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3368f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3369f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3370f2d3d75bSLars Ellenberg 				return -100;
3371f2d3d75bSLars Ellenberg 			}
3372f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3373f2d3d75bSLars Ellenberg 				return 1;
3374f2d3d75bSLars Ellenberg 			return -1;
3375f2d3d75bSLars Ellenberg 		}
3376f2d3d75bSLars Ellenberg 
3377f2d3d75bSLars Ellenberg 		/* Both are secondary.
3378f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3379f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3380b411b363SPhilipp Reisner 		switch (rct) {
3381f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3382b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3383b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3384b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
338544a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3386b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3387b411b363SPhilipp Reisner 		}
3388b411b363SPhilipp Reisner 	}
3389b411b363SPhilipp Reisner 
3390b411b363SPhilipp Reisner 	*rule_nr = 50;
3391b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3392b411b363SPhilipp Reisner 	if (self == peer)
3393b411b363SPhilipp Reisner 		return -1;
3394b411b363SPhilipp Reisner 
3395b411b363SPhilipp Reisner 	*rule_nr = 51;
3396b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3397b411b363SPhilipp Reisner 	if (self == peer) {
339844a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3399b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3400b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3401b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3402b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3403b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3404b411b363SPhilipp Reisner 
340544a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34064a23f264SPhilipp Reisner 				return -1091;
3407b411b363SPhilipp Reisner 
3408b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3409b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
34104a23f264SPhilipp Reisner 
3411d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3412b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
34134a23f264SPhilipp Reisner 
3414b411b363SPhilipp Reisner 			return -1;
3415b411b363SPhilipp Reisner 		}
3416b411b363SPhilipp Reisner 	}
3417b411b363SPhilipp Reisner 
3418b411b363SPhilipp Reisner 	*rule_nr = 60;
3419b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3420b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3421b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3422b411b363SPhilipp Reisner 		if (self == peer)
3423b411b363SPhilipp Reisner 			return -2;
3424b411b363SPhilipp Reisner 	}
3425b411b363SPhilipp Reisner 
3426b411b363SPhilipp Reisner 	*rule_nr = 70;
3427b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3428b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3429b411b363SPhilipp Reisner 	if (self == peer)
3430b411b363SPhilipp Reisner 		return 1;
3431b411b363SPhilipp Reisner 
3432b411b363SPhilipp Reisner 	*rule_nr = 71;
3433b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3434b411b363SPhilipp Reisner 	if (self == peer) {
343544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3436b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3437b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3438b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3439b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3440b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3441b411b363SPhilipp Reisner 
344244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34434a23f264SPhilipp Reisner 				return -1091;
3444b411b363SPhilipp Reisner 
3445b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3446b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3447b411b363SPhilipp Reisner 
3448d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3449b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3450b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3451b411b363SPhilipp Reisner 
3452b411b363SPhilipp Reisner 			return 1;
3453b411b363SPhilipp Reisner 		}
3454b411b363SPhilipp Reisner 	}
3455b411b363SPhilipp Reisner 
3456b411b363SPhilipp Reisner 
3457b411b363SPhilipp Reisner 	*rule_nr = 80;
3458b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3459b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3460b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3461b411b363SPhilipp Reisner 		if (self == peer)
3462b411b363SPhilipp Reisner 			return 2;
3463b411b363SPhilipp Reisner 	}
3464b411b363SPhilipp Reisner 
3465b411b363SPhilipp Reisner 	*rule_nr = 90;
3466b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3467b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3468b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3469b411b363SPhilipp Reisner 		return 100;
3470b411b363SPhilipp Reisner 
3471b411b363SPhilipp Reisner 	*rule_nr = 100;
3472b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3473b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3474b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3475b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3476b411b363SPhilipp Reisner 			if (self == peer)
3477b411b363SPhilipp Reisner 				return -100;
3478b411b363SPhilipp Reisner 		}
3479b411b363SPhilipp Reisner 	}
3480b411b363SPhilipp Reisner 
3481b411b363SPhilipp Reisner 	return -1000;
3482b411b363SPhilipp Reisner }
3483b411b363SPhilipp Reisner 
3484b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3485b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3486b411b363SPhilipp Reisner  */
348769a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
348869a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3489b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3490b411b363SPhilipp Reisner {
349169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3492b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3493b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
349444ed167dSPhilipp Reisner 	struct net_conf *nc;
3495d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3496b411b363SPhilipp Reisner 
3497b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3498b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3499b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3500b411b363SPhilipp Reisner 
3501d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
35029f2247bbSPhilipp Reisner 
3503b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3504b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3505b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3506b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3507b411b363SPhilipp Reisner 
3508f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3509b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3510b411b363SPhilipp Reisner 
3511d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3512b411b363SPhilipp Reisner 
3513b411b363SPhilipp Reisner 	if (hg == -1000) {
3514d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3515b411b363SPhilipp Reisner 		return C_MASK;
3516b411b363SPhilipp Reisner 	}
3517f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3518f2d3d75bSLars Ellenberg 		int proto, fflags;
3519f2d3d75bSLars Ellenberg 		hg = -hg;
3520f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3521f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3522f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3523f2d3d75bSLars Ellenberg 					proto, fflags);
3524f2d3d75bSLars Ellenberg 		return C_MASK;
3525f2d3d75bSLars Ellenberg 	}
35264a23f264SPhilipp Reisner 	if (hg < -1000) {
3527d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3528b411b363SPhilipp Reisner 		return C_MASK;
3529b411b363SPhilipp Reisner 	}
3530b411b363SPhilipp Reisner 
3531b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3532b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3533b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3534b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3535b411b363SPhilipp Reisner 		if (f)
3536b411b363SPhilipp Reisner 			hg = hg*2;
3537d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3538b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3539b411b363SPhilipp Reisner 	}
3540b411b363SPhilipp Reisner 
35413a11a487SAdam Gandelman 	if (abs(hg) == 100)
3542b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35433a11a487SAdam Gandelman 
354444ed167dSPhilipp Reisner 	rcu_read_lock();
354569a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3546d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3547d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3548d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3549d29e89e3SRoland Kammerer 	rcu_read_unlock();
355044ed167dSPhilipp Reisner 
3551d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3552b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3553b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3554b411b363SPhilipp Reisner 		int forced = (hg == -100);
3555b411b363SPhilipp Reisner 
3556b411b363SPhilipp Reisner 		switch (pcount) {
3557b411b363SPhilipp Reisner 		case 0:
355869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3559b411b363SPhilipp Reisner 			break;
3560b411b363SPhilipp Reisner 		case 1:
356169a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3562b411b363SPhilipp Reisner 			break;
3563b411b363SPhilipp Reisner 		case 2:
356469a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3565b411b363SPhilipp Reisner 			break;
3566b411b363SPhilipp Reisner 		}
3567b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3568d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3569b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3570b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3571b411b363SPhilipp Reisner 			if (forced) {
3572d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3573b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3574b411b363SPhilipp Reisner 				hg = hg*2;
3575b411b363SPhilipp Reisner 			}
3576b411b363SPhilipp Reisner 		}
3577b411b363SPhilipp Reisner 	}
3578b411b363SPhilipp Reisner 
3579b411b363SPhilipp Reisner 	if (hg == -100) {
3580b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3581b411b363SPhilipp Reisner 			hg = -1;
3582b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3583b411b363SPhilipp Reisner 			hg = 1;
3584b411b363SPhilipp Reisner 
3585b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3586d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3587b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3588b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3589b411b363SPhilipp Reisner 	}
3590b411b363SPhilipp Reisner 
3591b411b363SPhilipp Reisner 	if (hg == -100) {
3592580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3593580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3594580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3595580b9767SLars Ellenberg 		 * to that disk, in a way... */
3596d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3597b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3598b411b363SPhilipp Reisner 		return C_MASK;
3599b411b363SPhilipp Reisner 	}
3600b411b363SPhilipp Reisner 
3601b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3602d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3603b411b363SPhilipp Reisner 		return C_MASK;
3604b411b363SPhilipp Reisner 	}
3605b411b363SPhilipp Reisner 
3606b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3607b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
360844ed167dSPhilipp Reisner 		switch (rr_conflict) {
3609b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3610b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3611df561f66SGustavo A. R. Silva 			fallthrough;
3612b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3613d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3614b411b363SPhilipp Reisner 			return C_MASK;
3615b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3616d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3617b411b363SPhilipp Reisner 			     "assumption\n");
3618b411b363SPhilipp Reisner 		}
3619b411b363SPhilipp Reisner 	}
3620b411b363SPhilipp Reisner 
362169a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3622cf14c2e9SPhilipp Reisner 		if (hg == 0)
3623d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3624cf14c2e9SPhilipp Reisner 		else
3625d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3626cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3627cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3628cf14c2e9SPhilipp Reisner 		return C_MASK;
3629cf14c2e9SPhilipp Reisner 	}
3630cf14c2e9SPhilipp Reisner 
3631b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3632d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3633b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
363420ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3635b411b363SPhilipp Reisner 			return C_MASK;
3636b411b363SPhilipp Reisner 	}
3637b411b363SPhilipp Reisner 
3638b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3639b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3640b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3641b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3642b411b363SPhilipp Reisner 	} else {
3643b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3644b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3645d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3646b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3647b411b363SPhilipp Reisner 		}
3648b411b363SPhilipp Reisner 	}
3649b411b363SPhilipp Reisner 
3650b411b363SPhilipp Reisner 	return rv;
3651b411b363SPhilipp Reisner }
3652b411b363SPhilipp Reisner 
3653f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3654b411b363SPhilipp Reisner {
3655b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3656f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3657f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3658b411b363SPhilipp Reisner 
3659b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3660f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3661f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3662b411b363SPhilipp Reisner 
3663b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3664f179d76dSPhilipp Reisner 	return peer;
3665b411b363SPhilipp Reisner }
3666b411b363SPhilipp Reisner 
3667bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3668b411b363SPhilipp Reisner {
3669e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3670036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3671036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3672036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3673036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36743d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36757aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3676b411b363SPhilipp Reisner 
3677b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3678b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3679b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3680b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3681b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3682cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36836139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3684cf14c2e9SPhilipp Reisner 
3685bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
368686db0618SAndreas Gruenbacher 		int err;
368786db0618SAndreas Gruenbacher 
368888104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
368986db0618SAndreas Gruenbacher 			return -EIO;
3690bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
369186db0618SAndreas Gruenbacher 		if (err)
369286db0618SAndreas Gruenbacher 			return err;
369388104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3694036b17eaSPhilipp Reisner 	}
369586db0618SAndreas Gruenbacher 
36967d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3697bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3698cf14c2e9SPhilipp Reisner 
3699cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3700bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3701b411b363SPhilipp Reisner 
370244ed167dSPhilipp Reisner 		rcu_read_lock();
3703bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
370444ed167dSPhilipp Reisner 
3705036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
37061ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
370744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3708b411b363SPhilipp Reisner 		}
3709b411b363SPhilipp Reisner 
3710f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
37111ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
371244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3713b411b363SPhilipp Reisner 		}
3714b411b363SPhilipp Reisner 
3715f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
37161ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
371744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3718b411b363SPhilipp Reisner 		}
3719b411b363SPhilipp Reisner 
3720f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
37211ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
372244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3723b411b363SPhilipp Reisner 		}
3724b411b363SPhilipp Reisner 
37256139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
37261ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
372744ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3728b411b363SPhilipp Reisner 		}
3729b411b363SPhilipp Reisner 
373044ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
37311ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
373244ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3733b411b363SPhilipp Reisner 		}
3734b411b363SPhilipp Reisner 
3735036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
37361ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3737036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3738036b17eaSPhilipp Reisner 		}
3739036b17eaSPhilipp Reisner 
374086db0618SAndreas Gruenbacher 		rcu_read_unlock();
3741fbc12f45SAndreas Gruenbacher 	}
37427d4c782cSAndreas Gruenbacher 
37437d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37447d4c782cSAndreas Gruenbacher 		int hash_size;
37457d4c782cSAndreas Gruenbacher 
37467d4c782cSAndreas Gruenbacher 		/*
37477d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37487d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37497d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37507d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37517d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37527d4c782cSAndreas Gruenbacher 		 * change.
37537d4c782cSAndreas Gruenbacher 		 */
37547d4c782cSAndreas Gruenbacher 
37553d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37561b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37571b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37581ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37597d4c782cSAndreas Gruenbacher 				 integrity_alg);
3760b411b363SPhilipp Reisner 			goto disconnect;
3761b411b363SPhilipp Reisner 		}
3762b411b363SPhilipp Reisner 
37633d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37647d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37657d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37667d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37671ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37687d4c782cSAndreas Gruenbacher 			goto disconnect;
37697d4c782cSAndreas Gruenbacher 		}
37707d4c782cSAndreas Gruenbacher 	}
37717d4c782cSAndreas Gruenbacher 
37727d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37738404e191SZhen Lei 	if (!new_net_conf)
3774b411b363SPhilipp Reisner 		goto disconnect;
3775b411b363SPhilipp Reisner 
3776bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37770500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3778bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37797d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3780b411b363SPhilipp Reisner 
37817d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37827d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37837d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37847d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37857d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3786b411b363SPhilipp Reisner 
3787bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37880500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3789bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3790b411b363SPhilipp Reisner 
37913d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3792bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3793bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3794bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3795bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3796bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3797b411b363SPhilipp Reisner 
37987d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37991ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
38007d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3801b411b363SPhilipp Reisner 
38027d4c782cSAndreas Gruenbacher 	synchronize_rcu();
38037d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
380482bc0194SAndreas Gruenbacher 	return 0;
3805b411b363SPhilipp Reisner 
380644ed167dSPhilipp Reisner disconnect_rcu_unlock:
380744ed167dSPhilipp Reisner 	rcu_read_unlock();
3808b411b363SPhilipp Reisner disconnect:
38093d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3810036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3811036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3812bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
381382bc0194SAndreas Gruenbacher 	return -EIO;
3814b411b363SPhilipp Reisner }
3815b411b363SPhilipp Reisner 
3816b411b363SPhilipp Reisner /* helper function
3817b411b363SPhilipp Reisner  * input: alg name, feature name
3818b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3819b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3820b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
38213d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
38223d0e6375SKees Cook 		const struct drbd_device *device,
3823b411b363SPhilipp Reisner 		const char *alg, const char *name)
3824b411b363SPhilipp Reisner {
38253d0e6375SKees Cook 	struct crypto_shash *tfm;
3826b411b363SPhilipp Reisner 
3827b411b363SPhilipp Reisner 	if (!alg[0])
3828b411b363SPhilipp Reisner 		return NULL;
3829b411b363SPhilipp Reisner 
38303d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3831b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3832d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3833b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3834b411b363SPhilipp Reisner 		return tfm;
3835b411b363SPhilipp Reisner 	}
3836b411b363SPhilipp Reisner 	return tfm;
3837b411b363SPhilipp Reisner }
3838b411b363SPhilipp Reisner 
3839bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3840b411b363SPhilipp Reisner {
3841bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
38424a76b161SAndreas Gruenbacher 	int size = pi->size;
38434a76b161SAndreas Gruenbacher 
38444a76b161SAndreas Gruenbacher 	while (size) {
38454a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3846bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38474a76b161SAndreas Gruenbacher 		if (s <= 0) {
38484a76b161SAndreas Gruenbacher 			if (s < 0)
38494a76b161SAndreas Gruenbacher 				return s;
38504a76b161SAndreas Gruenbacher 			break;
38514a76b161SAndreas Gruenbacher 		}
38524a76b161SAndreas Gruenbacher 		size -= s;
38534a76b161SAndreas Gruenbacher 	}
38544a76b161SAndreas Gruenbacher 	if (size)
38554a76b161SAndreas Gruenbacher 		return -EIO;
38564a76b161SAndreas Gruenbacher 	return 0;
38574a76b161SAndreas Gruenbacher }
38584a76b161SAndreas Gruenbacher 
38594a76b161SAndreas Gruenbacher /*
38604a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38614a76b161SAndreas Gruenbacher  *
38624a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38634a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38644a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38654a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38664a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38674a76b161SAndreas Gruenbacher  *
38684a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38694a76b161SAndreas Gruenbacher  */
3870bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38714a76b161SAndreas Gruenbacher {
38721ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38732fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3874bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38754a76b161SAndreas Gruenbacher }
38764a76b161SAndreas Gruenbacher 
3877bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38784a76b161SAndreas Gruenbacher {
38799f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3880b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3881e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3882b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38833d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38843d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38852ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3886813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3887bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3888813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38896a365874SStephen Kitt 	unsigned int fifo_size = 0;
389082bc0194SAndreas Gruenbacher 	int err;
3891b411b363SPhilipp Reisner 
38929f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38939f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3894bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38959f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3896b411b363SPhilipp Reisner 
3897b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3898b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3899b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
39008e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
39018e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3902b411b363SPhilipp Reisner 
3903e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3904d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3905e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
390682bc0194SAndreas Gruenbacher 		return -EIO;
3907b411b363SPhilipp Reisner 	}
3908b411b363SPhilipp Reisner 
3909b411b363SPhilipp Reisner 	if (apv <= 88) {
3910e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3911e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39128e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3913e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3914e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39150b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
39168e26f9ccSPhilipp Reisner 	} else {
3917e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3918e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39190b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3920b411b363SPhilipp Reisner 	}
3921b411b363SPhilipp Reisner 
3922b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3923e658983aSAndreas Gruenbacher 	p = pi->data;
3924*52a0cab3SKees Cook 	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
3925*52a0cab3SKees Cook 	memset(&p->algs, 0, sizeof(p->algs));
3926b411b363SPhilipp Reisner 
39279f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
392882bc0194SAndreas Gruenbacher 	if (err)
392982bc0194SAndreas Gruenbacher 		return err;
3930b411b363SPhilipp Reisner 
39310500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
39329f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3933b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3934daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3935daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3936b30ab791SAndreas Gruenbacher 			put_ldev(device);
39370500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3938d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3939daeda1ccSPhilipp Reisner 			return -ENOMEM;
3940f399002eSLars Ellenberg 		}
3941b411b363SPhilipp Reisner 
3942b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3943daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3944daeda1ccSPhilipp Reisner 
39456394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3946813472ceSPhilipp Reisner 	}
3947b411b363SPhilipp Reisner 
3948b411b363SPhilipp Reisner 	if (apv >= 88) {
3949b411b363SPhilipp Reisner 		if (apv == 88) {
39505de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3951d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39525de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3953b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3954813472ceSPhilipp Reisner 				err = -EIO;
3955813472ceSPhilipp Reisner 				goto reconnect;
3956b411b363SPhilipp Reisner 			}
3957b411b363SPhilipp Reisner 
39589f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3959813472ceSPhilipp Reisner 			if (err)
3960813472ceSPhilipp Reisner 				goto reconnect;
3961b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3962b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39630b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3964b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3965b411b363SPhilipp Reisner 
3966b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3967b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3968b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39690b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39700b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3971b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3972b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3973b411b363SPhilipp Reisner 		}
3974b411b363SPhilipp Reisner 
39752ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3976b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3977d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39782ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3979b411b363SPhilipp Reisner 				goto disconnect;
3980b411b363SPhilipp Reisner 			}
3981b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3982b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3983b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3984b411b363SPhilipp Reisner 				verify_tfm = NULL;
3985b411b363SPhilipp Reisner 				goto disconnect;
3986b411b363SPhilipp Reisner 			}
3987b411b363SPhilipp Reisner 		}
3988b411b363SPhilipp Reisner 
39892ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3990b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3991d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39922ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3993b411b363SPhilipp Reisner 				goto disconnect;
3994b411b363SPhilipp Reisner 			}
3995b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3996b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3997b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3998b411b363SPhilipp Reisner 				csums_tfm = NULL;
3999b411b363SPhilipp Reisner 				goto disconnect;
4000b411b363SPhilipp Reisner 			}
4001b411b363SPhilipp Reisner 		}
4002b411b363SPhilipp Reisner 
4003813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
4004daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
4005daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
4006daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
4007daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
4008778f271dSPhilipp Reisner 
4009daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
4010b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
4011813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
4012813472ceSPhilipp Reisner 				if (!new_plan) {
4013d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
4014b30ab791SAndreas Gruenbacher 					put_ldev(device);
4015778f271dSPhilipp Reisner 					goto disconnect;
4016778f271dSPhilipp Reisner 				}
4017778f271dSPhilipp Reisner 			}
40188e26f9ccSPhilipp Reisner 		}
4019b411b363SPhilipp Reisner 
402091fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
40212ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
40228404e191SZhen Lei 			if (!new_net_conf)
402391fd4dadSPhilipp Reisner 				goto disconnect;
402491fd4dadSPhilipp Reisner 
40252ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
402691fd4dadSPhilipp Reisner 
4027b411b363SPhilipp Reisner 			if (verify_tfm) {
40282ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
40292ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
40303d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
40319f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
4032d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
4033b411b363SPhilipp Reisner 			}
4034b411b363SPhilipp Reisner 			if (csums_tfm) {
40352ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
40362ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
40373d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
40389f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
4039d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4040b411b363SPhilipp Reisner 			}
4041bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
4042778f271dSPhilipp Reisner 		}
4043b411b363SPhilipp Reisner 	}
4044b411b363SPhilipp Reisner 
4045813472ceSPhilipp Reisner 	if (new_disk_conf) {
4046b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4047b30ab791SAndreas Gruenbacher 		put_ldev(device);
4048b411b363SPhilipp Reisner 	}
4049813472ceSPhilipp Reisner 
4050813472ceSPhilipp Reisner 	if (new_plan) {
4051b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4052b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4053813472ceSPhilipp Reisner 	}
4054daeda1ccSPhilipp Reisner 
40550500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4056daeda1ccSPhilipp Reisner 	synchronize_rcu();
4057daeda1ccSPhilipp Reisner 	if (new_net_conf)
4058daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4059daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4060813472ceSPhilipp Reisner 	kfree(old_plan);
4061daeda1ccSPhilipp Reisner 
406282bc0194SAndreas Gruenbacher 	return 0;
4063b411b363SPhilipp Reisner 
4064813472ceSPhilipp Reisner reconnect:
4065813472ceSPhilipp Reisner 	if (new_disk_conf) {
4066b30ab791SAndreas Gruenbacher 		put_ldev(device);
4067813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4068813472ceSPhilipp Reisner 	}
40690500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4070813472ceSPhilipp Reisner 	return -EIO;
4071813472ceSPhilipp Reisner 
4072b411b363SPhilipp Reisner disconnect:
4073813472ceSPhilipp Reisner 	kfree(new_plan);
4074813472ceSPhilipp Reisner 	if (new_disk_conf) {
4075b30ab791SAndreas Gruenbacher 		put_ldev(device);
4076813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4077813472ceSPhilipp Reisner 	}
40780500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4079b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4080b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40813d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4082b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40833d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40849f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
408582bc0194SAndreas Gruenbacher 	return -EIO;
4086b411b363SPhilipp Reisner }
4087b411b363SPhilipp Reisner 
4088b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4089b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4090b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4091b411b363SPhilipp Reisner {
4092b411b363SPhilipp Reisner 	sector_t d;
4093b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4094b411b363SPhilipp Reisner 		return;
4095b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4096b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4097d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4098b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4099b411b363SPhilipp Reisner }
4100b411b363SPhilipp Reisner 
4101bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4102b411b363SPhilipp Reisner {
41039f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4104b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4105e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
41069104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4107e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
41086a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
410994c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4110b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4111e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4112b411b363SPhilipp Reisner 
41139f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
41149f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4115bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
41169f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4117155bd9d1SChristoph Hellwig 	cur_size = get_capacity(device->vdisk);
41184a76b161SAndreas Gruenbacher 
4119b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4120b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
41216a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4122b411b363SPhilipp Reisner 
4123b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4124b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4125b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4126b411b363SPhilipp Reisner 
4127b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4128daeda1ccSPhilipp Reisner 		rcu_read_lock();
4129b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4130daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4131daeda1ccSPhilipp Reisner 
4132b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4133b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4134b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4135daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4136b411b363SPhilipp Reisner 
4137b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4138b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4139b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4140daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4141b411b363SPhilipp Reisner 
4142ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4143ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4144ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
414560bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
414660bac040SLars Ellenberg 		if (new_size < cur_size &&
4147b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4148ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
414960bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
415060bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41519f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4152b30ab791SAndreas Gruenbacher 			put_ldev(device);
415382bc0194SAndreas Gruenbacher 			return -EIO;
4154b411b363SPhilipp Reisner 		}
4155daeda1ccSPhilipp Reisner 
4156daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4157daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4158daeda1ccSPhilipp Reisner 
4159daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4160daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4161b30ab791SAndreas Gruenbacher 				put_ldev(device);
4162daeda1ccSPhilipp Reisner 				return -ENOMEM;
4163daeda1ccSPhilipp Reisner 			}
4164daeda1ccSPhilipp Reisner 
41650500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4166b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4167daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4168daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4169daeda1ccSPhilipp Reisner 
4170b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41710500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
4172daeda1ccSPhilipp Reisner 			synchronize_rcu();
4173daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
4174daeda1ccSPhilipp Reisner 
4175ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4176ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4177daeda1ccSPhilipp Reisner 		}
4178daeda1ccSPhilipp Reisner 
4179b30ab791SAndreas Gruenbacher 		put_ldev(device);
4180b411b363SPhilipp Reisner 	}
4181b411b363SPhilipp Reisner 
418220c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4183dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
418420c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4185dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
418620c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
418720c68fdeSLars Ellenberg 
4188e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4189b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41909104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4191b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4192b30ab791SAndreas Gruenbacher 		put_ldev(device);
4193e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
419482bc0194SAndreas Gruenbacher 			return -EIO;
4195b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4196b411b363SPhilipp Reisner 	} else {
41976a8d68b1SLars Ellenberg 		/*
41986a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
41996a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
42006a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
42016a8d68b1SLars Ellenberg 		 *
42026a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
42036a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
42046a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
42056a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
42066a8d68b1SLars Ellenberg 		 *
42076a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
42086a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
420994c43a13SLars Ellenberg 		 *
421094c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
421194c43a13SLars Ellenberg 		 * In which case we ignore this completely.
42126a8d68b1SLars Ellenberg 		 */
421394c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
42149104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
421594c43a13SLars Ellenberg 		if (new_size == 0) {
421694c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
421794c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
421894c43a13SLars Ellenberg 			/* nothing to do */
421994c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
422094c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
422194c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422294c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
422394c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
422494c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422594c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
422694c43a13SLars Ellenberg 			return -EIO;
422794c43a13SLars Ellenberg 		} else {
422894c43a13SLars Ellenberg 			/* I believe the peer, if
422994c43a13SLars Ellenberg 			 *  - I don't have a current size myself
423094c43a13SLars Ellenberg 			 *  - we agree on the size anyways
423194c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
423294c43a13SLars Ellenberg 			 *    and he has the only disk
423394c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
423494c43a13SLars Ellenberg 			 *    and he has the only disk,
423594c43a13SLars Ellenberg 			 *    which is larger than my current size
423694c43a13SLars Ellenberg 			 */
423794c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
423894c43a13SLars Ellenberg 		}
4239b411b363SPhilipp Reisner 	}
4240b411b363SPhilipp Reisner 
4241b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4242b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4243b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4244b411b363SPhilipp Reisner 			ldsc = 1;
4245b411b363SPhilipp Reisner 		}
4246b411b363SPhilipp Reisner 
4247b30ab791SAndreas Gruenbacher 		put_ldev(device);
4248b411b363SPhilipp Reisner 	}
4249b411b363SPhilipp Reisner 
4250b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4251155bd9d1SChristoph Hellwig 		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4252155bd9d1SChristoph Hellwig 		    ldsc) {
4253b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4254b411b363SPhilipp Reisner 			 * needs to know my new size... */
425569a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4256b411b363SPhilipp Reisner 		}
4257b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4258b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4259b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4260b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4261e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4262d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4263b411b363SPhilipp Reisner 				else
4264b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4265e89b591cSPhilipp Reisner 			} else
4266b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4267b411b363SPhilipp Reisner 		}
4268b411b363SPhilipp Reisner 	}
4269b411b363SPhilipp Reisner 
427082bc0194SAndreas Gruenbacher 	return 0;
4271b411b363SPhilipp Reisner }
4272b411b363SPhilipp Reisner 
4273bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4274b411b363SPhilipp Reisner {
42759f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4276b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4277e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4278b411b363SPhilipp Reisner 	u64 *p_uuid;
427962b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4280b411b363SPhilipp Reisner 
42819f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42829f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4283bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42849f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42854a76b161SAndreas Gruenbacher 
4286365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42878404e191SZhen Lei 	if (!p_uuid)
4288063eacf8SJing Wang 		return false;
4289b411b363SPhilipp Reisner 
4290b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4291b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4292b411b363SPhilipp Reisner 
4293b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4294b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4295b411b363SPhilipp Reisner 
4296b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4297b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4298b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4299b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4300d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4301b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
43029f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
430382bc0194SAndreas Gruenbacher 		return -EIO;
4304b411b363SPhilipp Reisner 	}
4305b411b363SPhilipp Reisner 
4306b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4307b411b363SPhilipp Reisner 		int skip_initial_sync =
4308b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
43099f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4310b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4311b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4312b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4313d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4314b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
431520ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
431620ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4317b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4318b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4319b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4320b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4321b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
432262b0da3aSLars Ellenberg 			updated_uuids = 1;
4323b411b363SPhilipp Reisner 		}
4324b30ab791SAndreas Gruenbacher 		put_ldev(device);
4325b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4326b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
432718a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
432818a50fa2SPhilipp Reisner 		   for me. */
4329b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4330b411b363SPhilipp Reisner 	}
4331b411b363SPhilipp Reisner 
4332b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4333b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4334b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4335b411b363SPhilipp Reisner 	   new disk state... */
4336b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4337b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4338b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4339b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
434062b0da3aSLars Ellenberg 
434162b0da3aSLars Ellenberg 	if (updated_uuids)
4342b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4343b411b363SPhilipp Reisner 
434482bc0194SAndreas Gruenbacher 	return 0;
4345b411b363SPhilipp Reisner }
4346b411b363SPhilipp Reisner 
4347b411b363SPhilipp Reisner /**
4348b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4349b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4350b411b363SPhilipp Reisner  */
4351b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4352b411b363SPhilipp Reisner {
4353b411b363SPhilipp Reisner 	union drbd_state ms;
4354b411b363SPhilipp Reisner 
4355b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4356369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4357b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4358b411b363SPhilipp Reisner 
4359b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4360b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4361b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4362b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4363b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4364b411b363SPhilipp Reisner 	};
4365b411b363SPhilipp Reisner 
4366b411b363SPhilipp Reisner 	ms.i = ps.i;
4367b411b363SPhilipp Reisner 
4368b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4369b411b363SPhilipp Reisner 	ms.peer = ps.role;
4370b411b363SPhilipp Reisner 	ms.role = ps.peer;
4371b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4372b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4373b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4374b411b363SPhilipp Reisner 
4375b411b363SPhilipp Reisner 	return ms;
4376b411b363SPhilipp Reisner }
4377b411b363SPhilipp Reisner 
4378bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4379b411b363SPhilipp Reisner {
43809f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4381b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4382e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4383b411b363SPhilipp Reisner 	union drbd_state mask, val;
4384bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4385b411b363SPhilipp Reisner 
43869f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43879f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43884a76b161SAndreas Gruenbacher 		return -EIO;
43899f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43904a76b161SAndreas Gruenbacher 
4391b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4392b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4393b411b363SPhilipp Reisner 
43949f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4395b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
439669a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
439782bc0194SAndreas Gruenbacher 		return 0;
4398b411b363SPhilipp Reisner 	}
4399b411b363SPhilipp Reisner 
4400b411b363SPhilipp Reisner 	mask = convert_state(mask);
4401b411b363SPhilipp Reisner 	val = convert_state(val);
4402b411b363SPhilipp Reisner 
4403b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
440469a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4405047cd4a6SPhilipp Reisner 
4406b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4407b411b363SPhilipp Reisner 
440882bc0194SAndreas Gruenbacher 	return 0;
4409b411b363SPhilipp Reisner }
4410b411b363SPhilipp Reisner 
4411bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4412b411b363SPhilipp Reisner {
4413e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4414dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4415dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4416dfafcc8aSPhilipp Reisner 
4417dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4418dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4419dfafcc8aSPhilipp Reisner 
4420bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4421bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4422bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
442382bc0194SAndreas Gruenbacher 		return 0;
4424dfafcc8aSPhilipp Reisner 	}
4425dfafcc8aSPhilipp Reisner 
4426dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4427dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4428dfafcc8aSPhilipp Reisner 
4429bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4430bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4431dfafcc8aSPhilipp Reisner 
443282bc0194SAndreas Gruenbacher 	return 0;
4433dfafcc8aSPhilipp Reisner }
4434dfafcc8aSPhilipp Reisner 
4435bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4436b411b363SPhilipp Reisner {
44379f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4438b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4439e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
44404ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4441b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
444265d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4443b411b363SPhilipp Reisner 	int rv;
4444b411b363SPhilipp Reisner 
44459f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44469f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4447bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44489f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44494a76b161SAndreas Gruenbacher 
4450b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4451b411b363SPhilipp Reisner 
4452b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4453b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4454b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4455d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4456b411b363SPhilipp Reisner 	}
4457b411b363SPhilipp Reisner 
44580500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4459b411b363SPhilipp Reisner  retry:
4460b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44610500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4462b411b363SPhilipp Reisner 
4463668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4464545752d5SLars Ellenberg 	 * already decided to close the connection again,
4465545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4466545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
446758ffa580SLars Ellenberg 		return -ECONNRESET;
4468545752d5SLars Ellenberg 
446940424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
447040424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
447140424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
447240424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
447340424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
447440424e4aSLars Ellenberg 	 */
447540424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
447640424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4477e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4478e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4479e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4480e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4481e9ef7bb6SLars Ellenberg 		 * syncing states.
4482e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4483e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4484e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4485e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4486e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4487e9ef7bb6SLars Ellenberg 
4488e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4489e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4490e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4491e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4492e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4493b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4494b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
449582bc0194SAndreas Gruenbacher 			return 0;
4496e9ef7bb6SLars Ellenberg 		}
4497e9ef7bb6SLars Ellenberg 	}
4498e9ef7bb6SLars Ellenberg 
449902b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
450002b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
450102b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4502b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4503b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
450458ffa580SLars Ellenberg 		return 0;
450502b91b55SLars Ellenberg 	}
450602b91b55SLars Ellenberg 
4507e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4508e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4509e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4510e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4511e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4512e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4513e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4514e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4515e9ef7bb6SLars Ellenberg 
45164ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
45174ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4518b411b363SPhilipp Reisner 
451967531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
452067531718SPhilipp Reisner 		ns.conn = C_BEHIND;
452167531718SPhilipp Reisner 
4522fe43ed97SLars Ellenberg 	/* TODO:
4523fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4524fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4525fe43ed97SLars Ellenberg 	 *
4526fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4527fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4528fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4529fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4530fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4531fe43ed97SLars Ellenberg 	 *
4532fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4533fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4534fe43ed97SLars Ellenberg 	 *
4535fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4536fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4537fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4538fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4539fe43ed97SLars Ellenberg 	 */
4540fe43ed97SLars Ellenberg 
4541b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4542b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4543b411b363SPhilipp Reisner 		int cr; /* consider resync */
4544b411b363SPhilipp Reisner 
4545b411b363SPhilipp Reisner 		/* if we established a new connection */
45464ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4547b411b363SPhilipp Reisner 		/* if we had an established connection
4548b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45494ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4550b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45514ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4552b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4553a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4554b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4555b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4556b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45574ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4558b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4559b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4560b411b363SPhilipp Reisner 
4561b411b363SPhilipp Reisner 		if (cr)
456269a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4563b411b363SPhilipp Reisner 
4564b30ab791SAndreas Gruenbacher 		put_ldev(device);
45654ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45664ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4567b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4568b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4569b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4570d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4571b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4572580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4573b411b363SPhilipp Reisner 			} else {
45749f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
457582bc0194SAndreas Gruenbacher 					return -EIO;
45760b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45779f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
457882bc0194SAndreas Gruenbacher 				return -EIO;
4579b411b363SPhilipp Reisner 			}
4580b411b363SPhilipp Reisner 		}
4581b411b363SPhilipp Reisner 	}
4582b411b363SPhilipp Reisner 
45830500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4584b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4585b411b363SPhilipp Reisner 		goto retry;
4586b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4587b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4588b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4589b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45904ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4591b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45924ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4593b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4594b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
45958554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4596481c6f50SPhilipp Reisner 		   for temporal network outages! */
45970500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4598d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45999f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4600b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4601b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
46029f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
460382bc0194SAndreas Gruenbacher 		return -EIO;
4604481c6f50SPhilipp Reisner 	}
4605b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4606b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
46070500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4608b411b363SPhilipp Reisner 
4609b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
46109f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
461182bc0194SAndreas Gruenbacher 		return -EIO;
4612b411b363SPhilipp Reisner 	}
4613b411b363SPhilipp Reisner 
46144ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
46154ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4616b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4617b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4618b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4619b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
462069a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
462169a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4622b411b363SPhilipp Reisner 		}
4623b411b363SPhilipp Reisner 	}
4624b411b363SPhilipp Reisner 
4625b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4626b411b363SPhilipp Reisner 
4627b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4628b411b363SPhilipp Reisner 
462982bc0194SAndreas Gruenbacher 	return 0;
4630b411b363SPhilipp Reisner }
4631b411b363SPhilipp Reisner 
4632bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4633b411b363SPhilipp Reisner {
46349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4635b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4636e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
46374a76b161SAndreas Gruenbacher 
46389f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
46399f4fe9adSAndreas Gruenbacher 	if (!peer_device)
46404a76b161SAndreas Gruenbacher 		return -EIO;
46419f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4642b411b363SPhilipp Reisner 
4643b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4644b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4645b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4646b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4647b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4648b411b363SPhilipp Reisner 
46490b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4650b411b363SPhilipp Reisner 
4651b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4652b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4653b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4654b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4655b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4656b411b363SPhilipp Reisner 
4657b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4658b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4659b411b363SPhilipp Reisner 
4660b30ab791SAndreas Gruenbacher 		put_ldev(device);
4661b411b363SPhilipp Reisner 	} else
4662d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4663b411b363SPhilipp Reisner 
466482bc0194SAndreas Gruenbacher 	return 0;
4665b411b363SPhilipp Reisner }
4666b411b363SPhilipp Reisner 
46679b48ff07SLee Jones /*
46682c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46692c46407dSAndreas Gruenbacher  *
46702c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46712c46407dSAndreas Gruenbacher  * code upon failure.
46722c46407dSAndreas Gruenbacher  */
46732c46407dSAndreas Gruenbacher static int
467469a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4675e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4676b411b363SPhilipp Reisner {
467750d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
467869a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4679e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
468050d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4681e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46822c46407dSAndreas Gruenbacher 	int err;
4683b411b363SPhilipp Reisner 
468450d0b1adSAndreas Gruenbacher 	if (want != size) {
468569a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46862c46407dSAndreas Gruenbacher 		return -EIO;
4687b411b363SPhilipp Reisner 	}
4688b411b363SPhilipp Reisner 	if (want == 0)
46892c46407dSAndreas Gruenbacher 		return 0;
469069a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
469182bc0194SAndreas Gruenbacher 	if (err)
46922c46407dSAndreas Gruenbacher 		return err;
4693b411b363SPhilipp Reisner 
469469a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4695b411b363SPhilipp Reisner 
4696b411b363SPhilipp Reisner 	c->word_offset += num_words;
4697b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4698b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4699b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4700b411b363SPhilipp Reisner 
47012c46407dSAndreas Gruenbacher 	return 1;
4702b411b363SPhilipp Reisner }
4703b411b363SPhilipp Reisner 
4704a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4705a02d1240SAndreas Gruenbacher {
4706a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4707a02d1240SAndreas Gruenbacher }
4708a02d1240SAndreas Gruenbacher 
4709a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4710a02d1240SAndreas Gruenbacher {
4711a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4712a02d1240SAndreas Gruenbacher }
4713a02d1240SAndreas Gruenbacher 
4714a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4715a02d1240SAndreas Gruenbacher {
4716a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4717a02d1240SAndreas Gruenbacher }
4718a02d1240SAndreas Gruenbacher 
47199b48ff07SLee Jones /*
47202c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
47212c46407dSAndreas Gruenbacher  *
47222c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47232c46407dSAndreas Gruenbacher  * code upon failure.
47242c46407dSAndreas Gruenbacher  */
47252c46407dSAndreas Gruenbacher static int
472669a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4727b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4728c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4729c6d25cfeSPhilipp Reisner 		 unsigned int len)
4730b411b363SPhilipp Reisner {
4731b411b363SPhilipp Reisner 	struct bitstream bs;
4732b411b363SPhilipp Reisner 	u64 look_ahead;
4733b411b363SPhilipp Reisner 	u64 rl;
4734b411b363SPhilipp Reisner 	u64 tmp;
4735b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4736b411b363SPhilipp Reisner 	unsigned long e;
4737a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4738b411b363SPhilipp Reisner 	int have;
4739b411b363SPhilipp Reisner 	int bits;
4740b411b363SPhilipp Reisner 
4741a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4742b411b363SPhilipp Reisner 
4743b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4744b411b363SPhilipp Reisner 	if (bits < 0)
47452c46407dSAndreas Gruenbacher 		return -EIO;
4746b411b363SPhilipp Reisner 
4747b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4748b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4749b411b363SPhilipp Reisner 		if (bits <= 0)
47502c46407dSAndreas Gruenbacher 			return -EIO;
4751b411b363SPhilipp Reisner 
4752b411b363SPhilipp Reisner 		if (toggle) {
4753b411b363SPhilipp Reisner 			e = s + rl -1;
4754b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
475569a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47562c46407dSAndreas Gruenbacher 				return -EIO;
4757b411b363SPhilipp Reisner 			}
475869a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4759b411b363SPhilipp Reisner 		}
4760b411b363SPhilipp Reisner 
4761b411b363SPhilipp Reisner 		if (have < bits) {
476269a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4763b411b363SPhilipp Reisner 				have, bits, look_ahead,
4764b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4765b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47662c46407dSAndreas Gruenbacher 			return -EIO;
4767b411b363SPhilipp Reisner 		}
4768d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4769d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4770b411b363SPhilipp Reisner 			look_ahead >>= bits;
4771d2da5b0cSLars Ellenberg 		else
4772d2da5b0cSLars Ellenberg 			look_ahead = 0;
4773b411b363SPhilipp Reisner 		have -= bits;
4774b411b363SPhilipp Reisner 
4775b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4776b411b363SPhilipp Reisner 		if (bits < 0)
47772c46407dSAndreas Gruenbacher 			return -EIO;
4778b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4779b411b363SPhilipp Reisner 		have += bits;
4780b411b363SPhilipp Reisner 	}
4781b411b363SPhilipp Reisner 
4782b411b363SPhilipp Reisner 	c->bit_offset = s;
4783b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4784b411b363SPhilipp Reisner 
47852c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4786b411b363SPhilipp Reisner }
4787b411b363SPhilipp Reisner 
47889b48ff07SLee Jones /*
47892c46407dSAndreas Gruenbacher  * decode_bitmap_c
47902c46407dSAndreas Gruenbacher  *
47912c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47922c46407dSAndreas Gruenbacher  * code upon failure.
47932c46407dSAndreas Gruenbacher  */
47942c46407dSAndreas Gruenbacher static int
479569a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4796b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4797c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4798c6d25cfeSPhilipp Reisner 		unsigned int len)
4799b411b363SPhilipp Reisner {
4800a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
480169a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4802b411b363SPhilipp Reisner 
4803b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4804b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4805b411b363SPhilipp Reisner 	 * during all our tests. */
4806b411b363SPhilipp Reisner 
480769a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
480869a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
48092c46407dSAndreas Gruenbacher 	return -EIO;
4810b411b363SPhilipp Reisner }
4811b411b363SPhilipp Reisner 
4812b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4813b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4814b411b363SPhilipp Reisner {
4815b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4816a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
481750d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
481850d0b1adSAndreas Gruenbacher 	unsigned int plain =
481950d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
482050d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
482150d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
482250d0b1adSAndreas Gruenbacher 	unsigned int r;
4823b411b363SPhilipp Reisner 
4824b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4825b411b363SPhilipp Reisner 	if (total == 0)
4826b411b363SPhilipp Reisner 		return;
4827b411b363SPhilipp Reisner 
4828b411b363SPhilipp Reisner 	/* don't report if not compressed */
4829b411b363SPhilipp Reisner 	if (total >= plain)
4830b411b363SPhilipp Reisner 		return;
4831b411b363SPhilipp Reisner 
4832b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4833b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4834b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4835b411b363SPhilipp Reisner 
4836b411b363SPhilipp Reisner 	if (r > 1000)
4837b411b363SPhilipp Reisner 		r = 1000;
4838b411b363SPhilipp Reisner 
4839b411b363SPhilipp Reisner 	r = 1000 - r;
4840d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4841b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4842b411b363SPhilipp Reisner 			direction,
4843b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4844b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4845b411b363SPhilipp Reisner 			total, r/10, r % 10);
4846b411b363SPhilipp Reisner }
4847b411b363SPhilipp Reisner 
4848b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4849b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4850b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4851b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4852b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4853b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4854b411b363SPhilipp Reisner 
4855b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4856bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4857b411b363SPhilipp Reisner {
48589f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4859b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4860b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48612c46407dSAndreas Gruenbacher 	int err;
48624a76b161SAndreas Gruenbacher 
48639f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48649f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48654a76b161SAndreas Gruenbacher 		return -EIO;
48669f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4867b411b363SPhilipp Reisner 
4868b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
486920ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
487020ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4871b411b363SPhilipp Reisner 
4872b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4873b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4874b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4875b411b363SPhilipp Reisner 	};
4876b411b363SPhilipp Reisner 
48772c46407dSAndreas Gruenbacher 	for(;;) {
4878e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
487969a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4880e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4881b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4882b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4883e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4884b411b363SPhilipp Reisner 
4885bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4886d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
488782bc0194SAndreas Gruenbacher 				err = -EIO;
4888b411b363SPhilipp Reisner 				goto out;
4889b411b363SPhilipp Reisner 			}
4890e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4891d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
489282bc0194SAndreas Gruenbacher 				err = -EIO;
489378fcbdaeSAndreas Gruenbacher 				goto out;
4894b411b363SPhilipp Reisner 			}
48959f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4896e658983aSAndreas Gruenbacher 			if (err)
4897e658983aSAndreas Gruenbacher 			       goto out;
489869a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4899b411b363SPhilipp Reisner 		} else {
4900d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
490182bc0194SAndreas Gruenbacher 			err = -EIO;
4902b411b363SPhilipp Reisner 			goto out;
4903b411b363SPhilipp Reisner 		}
4904b411b363SPhilipp Reisner 
4905e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4906bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4907b411b363SPhilipp Reisner 
49082c46407dSAndreas Gruenbacher 		if (err <= 0) {
49092c46407dSAndreas Gruenbacher 			if (err < 0)
49102c46407dSAndreas Gruenbacher 				goto out;
4911b411b363SPhilipp Reisner 			break;
49122c46407dSAndreas Gruenbacher 		}
49139f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
491482bc0194SAndreas Gruenbacher 		if (err)
4915b411b363SPhilipp Reisner 			goto out;
49162c46407dSAndreas Gruenbacher 	}
4917b411b363SPhilipp Reisner 
4918b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4919b411b363SPhilipp Reisner 
4920b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4921de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4922de1f8e4aSAndreas Gruenbacher 
4923b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
492482bc0194SAndreas Gruenbacher 		if (err)
4925b411b363SPhilipp Reisner 			goto out;
4926b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4927b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
49280b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4929b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4930b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4931b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4932d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4933b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4934b411b363SPhilipp Reisner 	}
493582bc0194SAndreas Gruenbacher 	err = 0;
4936b411b363SPhilipp Reisner 
4937b411b363SPhilipp Reisner  out:
4938b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4939b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4940b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
494182bc0194SAndreas Gruenbacher 	return err;
4942b411b363SPhilipp Reisner }
4943b411b363SPhilipp Reisner 
4944bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4945b411b363SPhilipp Reisner {
49461ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4947e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4948b411b363SPhilipp Reisner 
4949bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4950b411b363SPhilipp Reisner }
4951b411b363SPhilipp Reisner 
4952bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4953b411b363SPhilipp Reisner {
4954b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4955b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4956ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
495782bc0194SAndreas Gruenbacher 	return 0;
4958b411b363SPhilipp Reisner }
4959b411b363SPhilipp Reisner 
4960bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
496173a01a18SPhilipp Reisner {
49629f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4963b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4964e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49654a76b161SAndreas Gruenbacher 
49669f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49679f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49684a76b161SAndreas Gruenbacher 		return -EIO;
49699f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
497073a01a18SPhilipp Reisner 
4971b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4972f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4973f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4974f735e363SLars Ellenberg 	case C_BEHIND:
4975f735e363SLars Ellenberg 			break;
4976f735e363SLars Ellenberg 	default:
4977d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4978b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4979f735e363SLars Ellenberg 	}
4980f735e363SLars Ellenberg 
4981b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
498273a01a18SPhilipp Reisner 
498382bc0194SAndreas Gruenbacher 	return 0;
498473a01a18SPhilipp Reisner }
498573a01a18SPhilipp Reisner 
4986700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4987700ca8c0SPhilipp Reisner {
4988700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4989700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4990700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4991700ca8c0SPhilipp Reisner 	sector_t sector;
4992700ca8c0SPhilipp Reisner 	int size, err = 0;
4993700ca8c0SPhilipp Reisner 
4994700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
4995700ca8c0SPhilipp Reisner 	if (!peer_device)
4996700ca8c0SPhilipp Reisner 		return -EIO;
4997700ca8c0SPhilipp Reisner 	device = peer_device->device;
4998700ca8c0SPhilipp Reisner 
4999700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5000700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5001700ca8c0SPhilipp Reisner 
5002700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
5003700ca8c0SPhilipp Reisner 
5004700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
5005700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
500645c21793SChristoph Hellwig 		const int op = REQ_OP_WRITE_ZEROES;
5007700ca8c0SPhilipp Reisner 
5008700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
50099104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
5010700ca8c0SPhilipp Reisner 		if (!peer_req) {
5011700ca8c0SPhilipp Reisner 			put_ldev(device);
5012700ca8c0SPhilipp Reisner 			return -ENOMEM;
5013700ca8c0SPhilipp Reisner 		}
5014700ca8c0SPhilipp Reisner 
5015700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
5016700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
5017f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
5018700ca8c0SPhilipp Reisner 
5019700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
5020700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
5021700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
5022700ca8c0SPhilipp Reisner 
5023700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
5024700ca8c0SPhilipp Reisner 		err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
5025700ca8c0SPhilipp Reisner 
5026700ca8c0SPhilipp Reisner 		if (err) {
5027700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
5028700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
5029700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
5030700ca8c0SPhilipp Reisner 
5031700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
5032700ca8c0SPhilipp Reisner 			put_ldev(device);
5033700ca8c0SPhilipp Reisner 			err = 0;
5034700ca8c0SPhilipp Reisner 			goto fail;
5035700ca8c0SPhilipp Reisner 		}
5036700ca8c0SPhilipp Reisner 
5037700ca8c0SPhilipp Reisner 		inc_unacked(device);
5038700ca8c0SPhilipp Reisner 
5039700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
5040700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
5041700ca8c0SPhilipp Reisner 	} else {
5042700ca8c0SPhilipp Reisner 	fail:
5043700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
5044700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5045700ca8c0SPhilipp Reisner 	}
5046700ca8c0SPhilipp Reisner 
5047700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5048700ca8c0SPhilipp Reisner 
5049700ca8c0SPhilipp Reisner 	return err;
5050700ca8c0SPhilipp Reisner }
5051700ca8c0SPhilipp Reisner 
505202918be2SPhilipp Reisner struct data_cmd {
505302918be2SPhilipp Reisner 	int expect_payload;
50549104d31aSLars Ellenberg 	unsigned int pkt_size;
5055bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5056b411b363SPhilipp Reisner };
5057b411b363SPhilipp Reisner 
505802918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
505902918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
506002918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
506102918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
506202918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5063e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5064e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5065e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
506602918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
506702918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5068e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5069e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
507002918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
507102918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
507202918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
507302918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
507402918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
507502918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
507602918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
507702918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
507802918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5079700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
508002918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
508173a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50824a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5083036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5084a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5085f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5086700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
50879104d31aSLars Ellenberg 	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
508802918be2SPhilipp Reisner };
508902918be2SPhilipp Reisner 
5090bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5091b411b363SPhilipp Reisner {
509277351055SPhilipp Reisner 	struct packet_info pi;
509302918be2SPhilipp Reisner 	size_t shs; /* sub header size */
509482bc0194SAndreas Gruenbacher 	int err;
5095b411b363SPhilipp Reisner 
5096bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
50979104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5098deebe195SAndreas Gruenbacher 
5099bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5100c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5101c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
510202918be2SPhilipp Reisner 			goto err_out;
510302918be2SPhilipp Reisner 
5104deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
51054a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
51061ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
51072fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
510802918be2SPhilipp Reisner 			goto err_out;
51090b33a916SLars Ellenberg 		}
5110b411b363SPhilipp Reisner 
5111e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
51129104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
51139104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5114e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
51151ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
51162fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5117c13f7e1aSLars Ellenberg 			goto err_out;
5118c13f7e1aSLars Ellenberg 		}
51199104d31aSLars Ellenberg 		if (pi.size < shs) {
51209104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
51219104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
51229104d31aSLars Ellenberg 			goto err_out;
51239104d31aSLars Ellenberg 		}
5124c13f7e1aSLars Ellenberg 
5125c13f7e1aSLars Ellenberg 		if (shs) {
5126944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5127bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5128a5c31904SAndreas Gruenbacher 			if (err)
512902918be2SPhilipp Reisner 				goto err_out;
5130e2857216SAndreas Gruenbacher 			pi.size -= shs;
5131b411b363SPhilipp Reisner 		}
513202918be2SPhilipp Reisner 
5133944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5134bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
51354a76b161SAndreas Gruenbacher 		if (err) {
51361ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
51379f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
513802918be2SPhilipp Reisner 			goto err_out;
513902918be2SPhilipp Reisner 		}
514002918be2SPhilipp Reisner 	}
514182bc0194SAndreas Gruenbacher 	return;
514202918be2SPhilipp Reisner 
514302918be2SPhilipp Reisner     err_out:
5144bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5145b411b363SPhilipp Reisner }
5146b411b363SPhilipp Reisner 
5147bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5148f70b3511SPhilipp Reisner {
5149c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5150bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5151376694a0SPhilipp Reisner 	int vnr;
5152f70b3511SPhilipp Reisner 
5153bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5154b411b363SPhilipp Reisner 		return;
5155b411b363SPhilipp Reisner 
5156545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5157545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5158545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5159545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5160545752d5SLars Ellenberg 	 */
5161bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5162545752d5SLars Ellenberg 
5163668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51641c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5165668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5166668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5167668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5168668700b4SPhilipp Reisner 	}
5169bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5170360cc740SPhilipp Reisner 
5171c141ebdaSPhilipp Reisner 	rcu_read_lock();
5172c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5173c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5174b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5175c141ebdaSPhilipp Reisner 		rcu_read_unlock();
517669a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5177c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5178c141ebdaSPhilipp Reisner 		rcu_read_lock();
5179c141ebdaSPhilipp Reisner 	}
5180c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5181c141ebdaSPhilipp Reisner 
5182bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
518412038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5185bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5186bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
518712038a3aSPhilipp Reisner 
51881ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5189360cc740SPhilipp Reisner 
5190bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5191bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5192cb703454SPhilipp Reisner 
51930500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5194bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5195bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5196bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5197bbeb641cSPhilipp Reisner 
51980500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5199360cc740SPhilipp Reisner 
5200f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5201bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5202360cc740SPhilipp Reisner }
5203360cc740SPhilipp Reisner 
520469a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5205360cc740SPhilipp Reisner {
520669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5207360cc740SPhilipp Reisner 	unsigned int i;
5208b411b363SPhilipp Reisner 
520985719573SPhilipp Reisner 	/* wait for current activity to cease. */
52100500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5211b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5212b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5213b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
52140500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5215b411b363SPhilipp Reisner 
5216b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5217b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5218b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5219b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5220b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5221b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5222b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5223b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5224b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5225b411b363SPhilipp Reisner 	 *  on the fly. */
5226b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5227b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5228b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5229b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5230b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5231b411b363SPhilipp Reisner 
5232b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
52332bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5234b411b363SPhilipp Reisner 
5235b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5236b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5237b411b363SPhilipp Reisner 	 * to be "canceled" */
5238b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5239b411b363SPhilipp Reisner 
5240b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5241b411b363SPhilipp Reisner 
5242d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5243d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5244d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5245b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5246d10b4ea3SPhilipp Reisner 
524708332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
524808332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5249b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5250b411b363SPhilipp Reisner 
5251b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5252b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5253b411b363SPhilipp Reisner 
5254b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
525569a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5256b411b363SPhilipp Reisner 
5257b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5258b411b363SPhilipp Reisner 
5259be115b69SLars Ellenberg 	if (get_ldev(device)) {
5260be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5261be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5262be115b69SLars Ellenberg 		put_ldev(device);
5263be115b69SLars Ellenberg 	}
526420ceb2b2SLars Ellenberg 
5265b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5266b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5267b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5268b411b363SPhilipp Reisner 	 *
5269b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5270b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5271b411b363SPhilipp Reisner 	 */
5272b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5273b411b363SPhilipp Reisner 	if (i)
5274d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5275b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5276435f0740SLars Ellenberg 	if (i)
5277d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5278b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5279b411b363SPhilipp Reisner 	if (i)
5280d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5281b411b363SPhilipp Reisner 
52820b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52830b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52840b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52850b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5286b411b363SPhilipp Reisner 
5287360cc740SPhilipp Reisner 	return 0;
5288b411b363SPhilipp Reisner }
5289b411b363SPhilipp Reisner 
5290b411b363SPhilipp Reisner /*
5291b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5292b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5293b411b363SPhilipp Reisner  *
5294b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5295b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5296b411b363SPhilipp Reisner  *
5297b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5298b411b363SPhilipp Reisner  */
5299bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5300b411b363SPhilipp Reisner {
53019f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
53029f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5303b411b363SPhilipp Reisner 
5304bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5305bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
53069f5bdc33SAndreas Gruenbacher 	if (!p)
5307e8d17b01SAndreas Gruenbacher 		return -EIO;
5308b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5309b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5310b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
531120c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5312bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5313b411b363SPhilipp Reisner }
5314b411b363SPhilipp Reisner 
5315b411b363SPhilipp Reisner /*
5316b411b363SPhilipp Reisner  * return values:
5317b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5318b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5319b411b363SPhilipp Reisner  *  -1 peer talks different language,
5320b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5321b411b363SPhilipp Reisner  */
5322bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5323b411b363SPhilipp Reisner {
5324bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5325e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5326e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
532777351055SPhilipp Reisner 	struct packet_info pi;
5328a5c31904SAndreas Gruenbacher 	int err;
5329b411b363SPhilipp Reisner 
5330bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5331e8d17b01SAndreas Gruenbacher 	if (err)
5332b411b363SPhilipp Reisner 		return 0;
5333b411b363SPhilipp Reisner 
5334bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
533569bc7bc3SAndreas Gruenbacher 	if (err)
5336b411b363SPhilipp Reisner 		return 0;
5337b411b363SPhilipp Reisner 
53386038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
53391ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
534077351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5341b411b363SPhilipp Reisner 		return -1;
5342b411b363SPhilipp Reisner 	}
5343b411b363SPhilipp Reisner 
534477351055SPhilipp Reisner 	if (pi.size != expect) {
53451ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
534677351055SPhilipp Reisner 		     expect, pi.size);
5347b411b363SPhilipp Reisner 		return -1;
5348b411b363SPhilipp Reisner 	}
5349b411b363SPhilipp Reisner 
5350e658983aSAndreas Gruenbacher 	p = pi.data;
5351bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5352a5c31904SAndreas Gruenbacher 	if (err)
5353b411b363SPhilipp Reisner 		return 0;
5354b411b363SPhilipp Reisner 
5355b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5356b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5357b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5358b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5359b411b363SPhilipp Reisner 
5360b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5361b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5362b411b363SPhilipp Reisner 		goto incompat;
5363b411b363SPhilipp Reisner 
5364bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
536520c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5366b411b363SPhilipp Reisner 
53671ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5368bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5369b411b363SPhilipp Reisner 
5370f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53719104d31aSLars Ellenberg 		  connection->agreed_features,
53729104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53739104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5374f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5375f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53769104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
537792d94ae6SPhilipp Reisner 
5378b411b363SPhilipp Reisner 	return 1;
5379b411b363SPhilipp Reisner 
5380b411b363SPhilipp Reisner  incompat:
53811ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5382b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5383b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5384b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5385b411b363SPhilipp Reisner 	return -1;
5386b411b363SPhilipp Reisner }
5387b411b363SPhilipp Reisner 
5388b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5389bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5390b411b363SPhilipp Reisner {
53911ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53921ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5393b10d96cbSJohannes Thoma 	return -1;
5394b411b363SPhilipp Reisner }
5395b411b363SPhilipp Reisner #else
5396b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5397b10d96cbSJohannes Thoma 
5398b10d96cbSJohannes Thoma /* Return value:
5399b10d96cbSJohannes Thoma 	1 - auth succeeded,
5400b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5401b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5402b10d96cbSJohannes Thoma */
5403b10d96cbSJohannes Thoma 
5404bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5405b411b363SPhilipp Reisner {
54069f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5407b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5408b411b363SPhilipp Reisner 	char *response = NULL;
5409b411b363SPhilipp Reisner 	char *right_response = NULL;
5410b411b363SPhilipp Reisner 	char *peers_ch = NULL;
541144ed167dSPhilipp Reisner 	unsigned int key_len;
541244ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5413b411b363SPhilipp Reisner 	unsigned int resp_size;
541477ce56e2SArnd Bergmann 	struct shash_desc *desc;
541577351055SPhilipp Reisner 	struct packet_info pi;
541644ed167dSPhilipp Reisner 	struct net_conf *nc;
541769bc7bc3SAndreas Gruenbacher 	int err, rv;
5418b411b363SPhilipp Reisner 
54199f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
54209f5bdc33SAndreas Gruenbacher 
542144ed167dSPhilipp Reisner 	rcu_read_lock();
5422bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
542344ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
542444ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
542544ed167dSPhilipp Reisner 	rcu_read_unlock();
542644ed167dSPhilipp Reisner 
542777ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
542877ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
542977ce56e2SArnd Bergmann 		       GFP_KERNEL);
543077ce56e2SArnd Bergmann 	if (!desc) {
543177ce56e2SArnd Bergmann 		rv = -1;
543277ce56e2SArnd Bergmann 		goto fail;
543377ce56e2SArnd Bergmann 	}
54349534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5435b411b363SPhilipp Reisner 
54369534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5437b411b363SPhilipp Reisner 	if (rv) {
54389534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5439b10d96cbSJohannes Thoma 		rv = -1;
5440b411b363SPhilipp Reisner 		goto fail;
5441b411b363SPhilipp Reisner 	}
5442b411b363SPhilipp Reisner 
5443b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5444b411b363SPhilipp Reisner 
5445bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5446bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54479f5bdc33SAndreas Gruenbacher 		rv = 0;
54489f5bdc33SAndreas Gruenbacher 		goto fail;
54499f5bdc33SAndreas Gruenbacher 	}
5450bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54519f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5452b411b363SPhilipp Reisner 	if (!rv)
5453b411b363SPhilipp Reisner 		goto fail;
5454b411b363SPhilipp Reisner 
5455bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
545669bc7bc3SAndreas Gruenbacher 	if (err) {
5457b411b363SPhilipp Reisner 		rv = 0;
5458b411b363SPhilipp Reisner 		goto fail;
5459b411b363SPhilipp Reisner 	}
5460b411b363SPhilipp Reisner 
546177351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54621ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
546377351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54649049ccd4SLars Ellenberg 		rv = -1;
5465b411b363SPhilipp Reisner 		goto fail;
5466b411b363SPhilipp Reisner 	}
5467b411b363SPhilipp Reisner 
546877351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54691ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5470b10d96cbSJohannes Thoma 		rv = -1;
5471b411b363SPhilipp Reisner 		goto fail;
5472b411b363SPhilipp Reisner 	}
5473b411b363SPhilipp Reisner 
547467cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
547567cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
547667cca286SPhilipp Reisner 		rv = -1;
547767cca286SPhilipp Reisner 		goto fail;
547867cca286SPhilipp Reisner 	}
547967cca286SPhilipp Reisner 
548077351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
54818404e191SZhen Lei 	if (!peers_ch) {
5482b10d96cbSJohannes Thoma 		rv = -1;
5483b411b363SPhilipp Reisner 		goto fail;
5484b411b363SPhilipp Reisner 	}
5485b411b363SPhilipp Reisner 
5486bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5487a5c31904SAndreas Gruenbacher 	if (err) {
5488b411b363SPhilipp Reisner 		rv = 0;
5489b411b363SPhilipp Reisner 		goto fail;
5490b411b363SPhilipp Reisner 	}
5491b411b363SPhilipp Reisner 
549267cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
549367cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
549467cca286SPhilipp Reisner 		rv = -1;
549567cca286SPhilipp Reisner 		goto fail;
549667cca286SPhilipp Reisner 	}
549767cca286SPhilipp Reisner 
54989534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5499b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
55008404e191SZhen Lei 	if (!response) {
5501b10d96cbSJohannes Thoma 		rv = -1;
5502b411b363SPhilipp Reisner 		goto fail;
5503b411b363SPhilipp Reisner 	}
5504b411b363SPhilipp Reisner 
55059534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5506b411b363SPhilipp Reisner 	if (rv) {
55071ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5508b10d96cbSJohannes Thoma 		rv = -1;
5509b411b363SPhilipp Reisner 		goto fail;
5510b411b363SPhilipp Reisner 	}
5511b411b363SPhilipp Reisner 
5512bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
55139f5bdc33SAndreas Gruenbacher 		rv = 0;
55149f5bdc33SAndreas Gruenbacher 		goto fail;
55159f5bdc33SAndreas Gruenbacher 	}
5516bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
55179f5bdc33SAndreas Gruenbacher 				response, resp_size);
5518b411b363SPhilipp Reisner 	if (!rv)
5519b411b363SPhilipp Reisner 		goto fail;
5520b411b363SPhilipp Reisner 
5521bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
552269bc7bc3SAndreas Gruenbacher 	if (err) {
5523b411b363SPhilipp Reisner 		rv = 0;
5524b411b363SPhilipp Reisner 		goto fail;
5525b411b363SPhilipp Reisner 	}
5526b411b363SPhilipp Reisner 
552777351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
55281ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
552977351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5530b411b363SPhilipp Reisner 		rv = 0;
5531b411b363SPhilipp Reisner 		goto fail;
5532b411b363SPhilipp Reisner 	}
5533b411b363SPhilipp Reisner 
553477351055SPhilipp Reisner 	if (pi.size != resp_size) {
55351ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5536b411b363SPhilipp Reisner 		rv = 0;
5537b411b363SPhilipp Reisner 		goto fail;
5538b411b363SPhilipp Reisner 	}
5539b411b363SPhilipp Reisner 
5540bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5541a5c31904SAndreas Gruenbacher 	if (err) {
5542b411b363SPhilipp Reisner 		rv = 0;
5543b411b363SPhilipp Reisner 		goto fail;
5544b411b363SPhilipp Reisner 	}
5545b411b363SPhilipp Reisner 
5546b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55478404e191SZhen Lei 	if (!right_response) {
5548b10d96cbSJohannes Thoma 		rv = -1;
5549b411b363SPhilipp Reisner 		goto fail;
5550b411b363SPhilipp Reisner 	}
5551b411b363SPhilipp Reisner 
55529534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55539534d671SHerbert Xu 				 right_response);
5554b411b363SPhilipp Reisner 	if (rv) {
55551ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5556b10d96cbSJohannes Thoma 		rv = -1;
5557b411b363SPhilipp Reisner 		goto fail;
5558b411b363SPhilipp Reisner 	}
5559b411b363SPhilipp Reisner 
5560b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5561b411b363SPhilipp Reisner 
5562b411b363SPhilipp Reisner 	if (rv)
55631ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
556444ed167dSPhilipp Reisner 		     resp_size);
5565b10d96cbSJohannes Thoma 	else
5566b10d96cbSJohannes Thoma 		rv = -1;
5567b411b363SPhilipp Reisner 
5568b411b363SPhilipp Reisner  fail:
5569b411b363SPhilipp Reisner 	kfree(peers_ch);
5570b411b363SPhilipp Reisner 	kfree(response);
5571b411b363SPhilipp Reisner 	kfree(right_response);
557277ce56e2SArnd Bergmann 	if (desc) {
55739534d671SHerbert Xu 		shash_desc_zero(desc);
557477ce56e2SArnd Bergmann 		kfree(desc);
557577ce56e2SArnd Bergmann 	}
5576b411b363SPhilipp Reisner 
5577b411b363SPhilipp Reisner 	return rv;
5578b411b363SPhilipp Reisner }
5579b411b363SPhilipp Reisner #endif
5580b411b363SPhilipp Reisner 
55818fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5582b411b363SPhilipp Reisner {
5583bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5584b411b363SPhilipp Reisner 	int h;
5585b411b363SPhilipp Reisner 
55861ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5587b411b363SPhilipp Reisner 
5588b411b363SPhilipp Reisner 	do {
5589bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5590b411b363SPhilipp Reisner 		if (h == 0) {
5591bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
559220ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5593b411b363SPhilipp Reisner 		}
5594b411b363SPhilipp Reisner 		if (h == -1) {
55951ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5596bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5597b411b363SPhilipp Reisner 		}
5598b411b363SPhilipp Reisner 	} while (h == 0);
5599b411b363SPhilipp Reisner 
5600c51a0ef3SLars Ellenberg 	if (h > 0) {
5601c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5602bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5603c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5604c51a0ef3SLars Ellenberg 	}
5605b411b363SPhilipp Reisner 
5606bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5607b411b363SPhilipp Reisner 
56081ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5609b411b363SPhilipp Reisner 	return 0;
5610b411b363SPhilipp Reisner }
5611b411b363SPhilipp Reisner 
5612b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5613b411b363SPhilipp Reisner 
5614bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5615b411b363SPhilipp Reisner {
5616e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5617b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5618b411b363SPhilipp Reisner 
5619b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5620bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5621b411b363SPhilipp Reisner 	} else {
5622bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
56231ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5624fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5625fc3b10a4SPhilipp Reisner 	}
5626bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5627e4f78edeSPhilipp Reisner 
56282735a594SAndreas Gruenbacher 	return 0;
5629fc3b10a4SPhilipp Reisner }
5630e4f78edeSPhilipp Reisner 
5631bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5632e4f78edeSPhilipp Reisner {
56339f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5634b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5635e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5636e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5637e4f78edeSPhilipp Reisner 
56389f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56399f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56402735a594SAndreas Gruenbacher 		return -EIO;
56419f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56421952e916SAndreas Gruenbacher 
5643bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56440b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5645bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
56464d0fc3fdSPhilipp Reisner 	}
56474d0fc3fdSPhilipp Reisner 
5648e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5649b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5650e4f78edeSPhilipp Reisner 	} else {
5651b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5652d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5653b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5654b411b363SPhilipp Reisner 	}
5655b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5656b411b363SPhilipp Reisner 
56572735a594SAndreas Gruenbacher 	return 0;
5658b411b363SPhilipp Reisner }
5659b411b363SPhilipp Reisner 
5660bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5661b411b363SPhilipp Reisner {
5662bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5663b411b363SPhilipp Reisner 
5664b411b363SPhilipp Reisner }
5665b411b363SPhilipp Reisner 
5666bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5667b411b363SPhilipp Reisner {
5668b411b363SPhilipp Reisner 	/* restore idle timeout */
5669bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5670bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5671bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5672b411b363SPhilipp Reisner 
56732735a594SAndreas Gruenbacher 	return 0;
5674b411b363SPhilipp Reisner }
5675b411b363SPhilipp Reisner 
5676bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5677b411b363SPhilipp Reisner {
56789f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5679b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5680e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5681b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5682b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5683b411b363SPhilipp Reisner 
56849f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56859f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56862735a594SAndreas Gruenbacher 		return -EIO;
56879f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56881952e916SAndreas Gruenbacher 
56899f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5690b411b363SPhilipp Reisner 
569169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5692b411b363SPhilipp Reisner 
5693b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5694b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5695b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5696b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5697b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5698b30ab791SAndreas Gruenbacher 		put_ldev(device);
56991d53f09eSLars Ellenberg 	}
5700b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5701b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5702b411b363SPhilipp Reisner 
57032735a594SAndreas Gruenbacher 	return 0;
5704b411b363SPhilipp Reisner }
5705b411b363SPhilipp Reisner 
5706bc9c5c41SAndreas Gruenbacher static int
5707b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5708bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5709bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5710b411b363SPhilipp Reisner {
5711b411b363SPhilipp Reisner 	struct drbd_request *req;
5712b411b363SPhilipp Reisner 	struct bio_and_error m;
5713b411b363SPhilipp Reisner 
57140500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5715b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5716b411b363SPhilipp Reisner 	if (unlikely(!req)) {
57170500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
571885997675SAndreas Gruenbacher 		return -EIO;
5719b411b363SPhilipp Reisner 	}
5720b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
57210500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5722b411b363SPhilipp Reisner 
5723b411b363SPhilipp Reisner 	if (m.bio)
5724b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
572585997675SAndreas Gruenbacher 	return 0;
5726b411b363SPhilipp Reisner }
5727b411b363SPhilipp Reisner 
5728bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5729b411b363SPhilipp Reisner {
57309f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5731b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5732e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5733b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5734b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5735b411b363SPhilipp Reisner 	enum drbd_req_event what;
5736b411b363SPhilipp Reisner 
57379f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57389f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57392735a594SAndreas Gruenbacher 		return -EIO;
57409f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57411952e916SAndreas Gruenbacher 
574269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5743b411b363SPhilipp Reisner 
5744579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5745b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5746b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
57472735a594SAndreas Gruenbacher 		return 0;
5748b411b363SPhilipp Reisner 	}
5749e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5750b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57518554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5752b411b363SPhilipp Reisner 		break;
5753b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57548554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5755b411b363SPhilipp Reisner 		break;
5756b411b363SPhilipp Reisner 	case P_RECV_ACK:
57578554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5758b411b363SPhilipp Reisner 		break;
5759d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5760d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57617be8da07SAndreas Gruenbacher 		break;
57627be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57637be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5764b411b363SPhilipp Reisner 		break;
5765b411b363SPhilipp Reisner 	default:
57662735a594SAndreas Gruenbacher 		BUG();
5767b411b363SPhilipp Reisner 	}
5768b411b363SPhilipp Reisner 
5769b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5770b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5771bc9c5c41SAndreas Gruenbacher 					     what, false);
5772b411b363SPhilipp Reisner }
5773b411b363SPhilipp Reisner 
5774bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5775b411b363SPhilipp Reisner {
57769f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5777b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5778e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5779b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57802deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
578185997675SAndreas Gruenbacher 	int err;
5782b411b363SPhilipp Reisner 
57839f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57849f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57852735a594SAndreas Gruenbacher 		return -EIO;
57869f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5787b411b363SPhilipp Reisner 
578869a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5789b411b363SPhilipp Reisner 
5790579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5791b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5792b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
57932735a594SAndreas Gruenbacher 		return 0;
5794b411b363SPhilipp Reisner 	}
57952deb8336SPhilipp Reisner 
5796b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5797b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5798303d1448SPhilipp Reisner 					    NEG_ACKED, true);
579985997675SAndreas Gruenbacher 	if (err) {
58002deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
58012deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5802c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
58032deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
58042deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5805b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
58062deb8336SPhilipp Reisner 	}
58072735a594SAndreas Gruenbacher 	return 0;
5808b411b363SPhilipp Reisner }
5809b411b363SPhilipp Reisner 
5810bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5811b411b363SPhilipp Reisner {
58129f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5813b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5814e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5815b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5816b411b363SPhilipp Reisner 
58179f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58189f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58192735a594SAndreas Gruenbacher 		return -EIO;
58209f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58211952e916SAndreas Gruenbacher 
582269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
58237be8da07SAndreas Gruenbacher 
5824d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5825b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5826b411b363SPhilipp Reisner 
5827b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5828b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
58298554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5830b411b363SPhilipp Reisner }
5831b411b363SPhilipp Reisner 
5832bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5833b411b363SPhilipp Reisner {
58349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5835b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5836b411b363SPhilipp Reisner 	sector_t sector;
5837b411b363SPhilipp Reisner 	int size;
5838e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
58391952e916SAndreas Gruenbacher 
58409f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58419f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58422735a594SAndreas Gruenbacher 		return -EIO;
58439f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5844b411b363SPhilipp Reisner 
5845b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5846b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5847b411b363SPhilipp Reisner 
584869a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5849b411b363SPhilipp Reisner 
5850b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5851b411b363SPhilipp Reisner 
5852b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5853b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5854e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5855d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5856b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
58576327c911SGustavo A. R. Silva 			break;
5858d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5859d612d309SPhilipp Reisner 			break;
5860d612d309SPhilipp Reisner 		default:
58612735a594SAndreas Gruenbacher 			BUG();
5862d612d309SPhilipp Reisner 		}
5863b30ab791SAndreas Gruenbacher 		put_ldev(device);
5864b411b363SPhilipp Reisner 	}
5865b411b363SPhilipp Reisner 
58662735a594SAndreas Gruenbacher 	return 0;
5867b411b363SPhilipp Reisner }
5868b411b363SPhilipp Reisner 
5869bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5870b411b363SPhilipp Reisner {
5871e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5872c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58739ed57dcbSLars Ellenberg 	int vnr;
5874b411b363SPhilipp Reisner 
5875bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5876b411b363SPhilipp Reisner 
58779ed57dcbSLars Ellenberg 	rcu_read_lock();
5878c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5879c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5880c06ece6bSAndreas Gruenbacher 
5881b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5882b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5883b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5884b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5885b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5886c4752ef1SPhilipp Reisner 		}
58879ed57dcbSLars Ellenberg 	}
58889ed57dcbSLars Ellenberg 	rcu_read_unlock();
5889c4752ef1SPhilipp Reisner 
58902735a594SAndreas Gruenbacher 	return 0;
5891b411b363SPhilipp Reisner }
5892b411b363SPhilipp Reisner 
5893bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5894b411b363SPhilipp Reisner {
58959f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5896b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5897e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
589884b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5899b411b363SPhilipp Reisner 	sector_t sector;
5900b411b363SPhilipp Reisner 	int size;
5901b411b363SPhilipp Reisner 
59029f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
59039f4fe9adSAndreas Gruenbacher 	if (!peer_device)
59042735a594SAndreas Gruenbacher 		return -EIO;
59059f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
59061952e916SAndreas Gruenbacher 
5907b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5908b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5909b411b363SPhilipp Reisner 
591069a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5911b411b363SPhilipp Reisner 
5912b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5913b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5914b411b363SPhilipp Reisner 	else
5915b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5916b411b363SPhilipp Reisner 
5917b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
59182735a594SAndreas Gruenbacher 		return 0;
59191d53f09eSLars Ellenberg 
5920b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5921b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5922b411b363SPhilipp Reisner 
5923b30ab791SAndreas Gruenbacher 	--device->ov_left;
5924ea5442afSLars Ellenberg 
5925ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5926b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5927b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5928ea5442afSLars Ellenberg 
5929b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
593084b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
593184b8c06bSAndreas Gruenbacher 		if (dw) {
593284b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
593384b8c06bSAndreas Gruenbacher 			dw->device = device;
593484b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5935b411b363SPhilipp Reisner 		} else {
593684b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5937b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5938b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5939b411b363SPhilipp Reisner 		}
5940b411b363SPhilipp Reisner 	}
5941b30ab791SAndreas Gruenbacher 	put_ldev(device);
59422735a594SAndreas Gruenbacher 	return 0;
5943b411b363SPhilipp Reisner }
5944b411b363SPhilipp Reisner 
5945bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59460ced55a3SPhilipp Reisner {
59472735a594SAndreas Gruenbacher 	return 0;
59480ced55a3SPhilipp Reisner }
59490ced55a3SPhilipp Reisner 
5950668700b4SPhilipp Reisner struct meta_sock_cmd {
5951b411b363SPhilipp Reisner 	size_t pkt_size;
5952bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5953b411b363SPhilipp Reisner };
5954b411b363SPhilipp Reisner 
5955668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5956668700b4SPhilipp Reisner {
5957668700b4SPhilipp Reisner 	long t;
5958668700b4SPhilipp Reisner 	struct net_conf *nc;
5959668700b4SPhilipp Reisner 
5960668700b4SPhilipp Reisner 	rcu_read_lock();
5961668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5962668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5963668700b4SPhilipp Reisner 	rcu_read_unlock();
5964668700b4SPhilipp Reisner 
5965668700b4SPhilipp Reisner 	t *= HZ;
5966668700b4SPhilipp Reisner 	if (ping_timeout)
5967668700b4SPhilipp Reisner 		t /= 10;
5968668700b4SPhilipp Reisner 
5969668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5970668700b4SPhilipp Reisner }
5971668700b4SPhilipp Reisner 
5972668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5973668700b4SPhilipp Reisner {
5974668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5975668700b4SPhilipp Reisner }
5976668700b4SPhilipp Reisner 
5977668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5978668700b4SPhilipp Reisner {
5979668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5980668700b4SPhilipp Reisner }
5981668700b4SPhilipp Reisner 
5982668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5983e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5984e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5985b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5986b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5987b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5988d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5989b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5990b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5991b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5992b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5993b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5994b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5995b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
599602918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5997d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59981952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59991952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
6000b411b363SPhilipp Reisner };
6001b411b363SPhilipp Reisner 
60021c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
6003b411b363SPhilipp Reisner {
6004bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
6005668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
600677351055SPhilipp Reisner 	struct packet_info pi;
6007668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
6008257d0af6SPhilipp Reisner 	int rv;
6009bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
6010b411b363SPhilipp Reisner 	int received = 0;
6011bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
601252b061a4SAndreas Gruenbacher 	int expect   = header_size;
601344ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
6014b411b363SPhilipp Reisner 
60158b700983SPeter Zijlstra 	sched_set_fifo_low(current);
6016b411b363SPhilipp Reisner 
6017e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
601880822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
601944ed167dSPhilipp Reisner 
6020668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
602144ed167dSPhilipp Reisner 
6022bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
6023bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
60241ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
6025841ce241SAndreas Gruenbacher 				goto reconnect;
6026841ce241SAndreas Gruenbacher 			}
6027668700b4SPhilipp Reisner 			set_ping_timeout(connection);
602844ed167dSPhilipp Reisner 			ping_timeout_active = true;
6029b411b363SPhilipp Reisner 		}
6030b411b363SPhilipp Reisner 
6031668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
6032bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
6033b411b363SPhilipp Reisner 
6034b411b363SPhilipp Reisner 		/* Note:
6035b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
6036b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
6037b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
6038b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
6039b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
6040b411b363SPhilipp Reisner 		 * rv == expected: full header or command
6041b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
6042b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
6043b411b363SPhilipp Reisner 		 */
6044b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
6045b411b363SPhilipp Reisner 			received += rv;
6046b411b363SPhilipp Reisner 			buf	 += rv;
6047b411b363SPhilipp Reisner 		} else if (rv == 0) {
6048bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6049b66623e3SPhilipp Reisner 				long t;
6050b66623e3SPhilipp Reisner 				rcu_read_lock();
6051bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6052b66623e3SPhilipp Reisner 				rcu_read_unlock();
6053b66623e3SPhilipp Reisner 
6054bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6055bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6056b66623e3SPhilipp Reisner 						       t);
6057599377acSPhilipp Reisner 				if (t)
6058599377acSPhilipp Reisner 					break;
6059599377acSPhilipp Reisner 			}
60601ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6061b411b363SPhilipp Reisner 			goto reconnect;
6062b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6063cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6064cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6065668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6066cb6518cbSLars Ellenberg 				continue;
6067f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60681ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6069b411b363SPhilipp Reisner 				goto reconnect;
6070b411b363SPhilipp Reisner 			}
6071bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6072b411b363SPhilipp Reisner 			continue;
6073b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6074668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6075668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6076668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6077668700b4SPhilipp Reisner 			flush_signals(current);
6078b411b363SPhilipp Reisner 			continue;
6079b411b363SPhilipp Reisner 		} else {
60801ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6081b411b363SPhilipp Reisner 			goto reconnect;
6082b411b363SPhilipp Reisner 		}
6083b411b363SPhilipp Reisner 
6084b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6085bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6086b411b363SPhilipp Reisner 				goto reconnect;
6087668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6088668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60891ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60902fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6091b411b363SPhilipp Reisner 				goto disconnect;
6092b411b363SPhilipp Reisner 			}
6093e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
609452b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60951ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
609677351055SPhilipp Reisner 					pi.cmd, pi.size);
6097b411b363SPhilipp Reisner 				goto reconnect;
6098b411b363SPhilipp Reisner 			}
6099257d0af6SPhilipp Reisner 		}
6100b411b363SPhilipp Reisner 		if (received == expect) {
61012735a594SAndreas Gruenbacher 			bool err;
6102a4fbda8eSPhilipp Reisner 
6103bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
61042735a594SAndreas Gruenbacher 			if (err) {
6105d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6106b411b363SPhilipp Reisner 				goto reconnect;
61071952e916SAndreas Gruenbacher 			}
6108b411b363SPhilipp Reisner 
6109bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6110f36af18cSLars Ellenberg 
6111668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6112668700b4SPhilipp Reisner 				set_idle_timeout(connection);
611344ed167dSPhilipp Reisner 				ping_timeout_active = false;
611444ed167dSPhilipp Reisner 			}
6115b411b363SPhilipp Reisner 
6116bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6117b411b363SPhilipp Reisner 			received = 0;
611852b061a4SAndreas Gruenbacher 			expect	 = header_size;
6119b411b363SPhilipp Reisner 			cmd	 = NULL;
6120b411b363SPhilipp Reisner 		}
6121b411b363SPhilipp Reisner 	}
6122b411b363SPhilipp Reisner 
6123b411b363SPhilipp Reisner 	if (0) {
6124b411b363SPhilipp Reisner reconnect:
6125bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6126bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6127b411b363SPhilipp Reisner 	}
6128b411b363SPhilipp Reisner 	if (0) {
6129b411b363SPhilipp Reisner disconnect:
6130bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6131b411b363SPhilipp Reisner 	}
6132b411b363SPhilipp Reisner 
6133668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6134b411b363SPhilipp Reisner 
6135b411b363SPhilipp Reisner 	return 0;
6136b411b363SPhilipp Reisner }
6137668700b4SPhilipp Reisner 
6138668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6139668700b4SPhilipp Reisner {
6140668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6141668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6142668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6143668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6144668700b4SPhilipp Reisner 	struct net_conf *nc;
6145668700b4SPhilipp Reisner 	int tcp_cork, err;
6146668700b4SPhilipp Reisner 
6147668700b4SPhilipp Reisner 	rcu_read_lock();
6148668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6149668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6150668700b4SPhilipp Reisner 	rcu_read_unlock();
6151668700b4SPhilipp Reisner 
6152668700b4SPhilipp Reisner 	if (tcp_cork)
6153db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6154668700b4SPhilipp Reisner 
6155668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6156668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6157668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6158668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6159668700b4SPhilipp Reisner 
6160668700b4SPhilipp Reisner 	if (err) {
6161668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6162668700b4SPhilipp Reisner 		return;
6163668700b4SPhilipp Reisner 	}
6164668700b4SPhilipp Reisner 
6165668700b4SPhilipp Reisner 	if (tcp_cork)
6166db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6167668700b4SPhilipp Reisner 
6168668700b4SPhilipp Reisner 	return;
6169668700b4SPhilipp Reisner }
6170