1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
37b411b363SPhilipp Reisner #include "drbd_int.h"
38a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner #include "drbd_vli.h"
41b411b363SPhilipp Reisner 
42f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4320c68fdeSLars Ellenberg 
4477351055SPhilipp Reisner struct packet_info {
4577351055SPhilipp Reisner 	enum drbd_packet cmd;
46e2857216SAndreas Gruenbacher 	unsigned int size;
47e2857216SAndreas Gruenbacher 	unsigned int vnr;
48e658983aSAndreas Gruenbacher 	void *data;
4977351055SPhilipp Reisner };
5077351055SPhilipp Reisner 
51b411b363SPhilipp Reisner enum finish_epoch {
52b411b363SPhilipp Reisner 	FE_STILL_LIVE,
53b411b363SPhilipp Reisner 	FE_DESTROYED,
54b411b363SPhilipp Reisner 	FE_RECYCLED,
55b411b363SPhilipp Reisner };
56b411b363SPhilipp Reisner 
57bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
58bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5969a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
60a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
61bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6299920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
66b411b363SPhilipp Reisner 
6745bb912bSLars Ellenberg /*
6845bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6945bb912bSLars Ellenberg  * page->private being our "next" pointer.
7045bb912bSLars Ellenberg  */
7145bb912bSLars Ellenberg 
7245bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7345bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7445bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7545bb912bSLars Ellenberg  */
7645bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7745bb912bSLars Ellenberg {
7845bb912bSLars Ellenberg 	struct page *page;
7945bb912bSLars Ellenberg 	struct page *tmp;
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg 	BUG_ON(!n);
8245bb912bSLars Ellenberg 	BUG_ON(!head);
8345bb912bSLars Ellenberg 
8445bb912bSLars Ellenberg 	page = *head;
8523ce4227SPhilipp Reisner 
8623ce4227SPhilipp Reisner 	if (!page)
8723ce4227SPhilipp Reisner 		return NULL;
8823ce4227SPhilipp Reisner 
8945bb912bSLars Ellenberg 	while (page) {
9045bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9145bb912bSLars Ellenberg 		if (--n == 0)
9245bb912bSLars Ellenberg 			break; /* found sufficient pages */
9345bb912bSLars Ellenberg 		if (tmp == NULL)
9445bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9545bb912bSLars Ellenberg 			return NULL;
9645bb912bSLars Ellenberg 		page = tmp;
9745bb912bSLars Ellenberg 	}
9845bb912bSLars Ellenberg 
9945bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10045bb912bSLars Ellenberg 	set_page_private(page, 0);
10145bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10245bb912bSLars Ellenberg 	page = *head;
10345bb912bSLars Ellenberg 	*head = tmp;
10445bb912bSLars Ellenberg 	return page;
10545bb912bSLars Ellenberg }
10645bb912bSLars Ellenberg 
10745bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10845bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10945bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11045bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11145bb912bSLars Ellenberg {
11245bb912bSLars Ellenberg 	struct page *tmp;
11345bb912bSLars Ellenberg 	int i = 1;
114e8628013SJoe Perches 	while ((tmp = page_chain_next(page))) {
115e8628013SJoe Perches 		++i;
116e8628013SJoe Perches 		page = tmp;
117e8628013SJoe Perches 	}
11845bb912bSLars Ellenberg 	if (len)
11945bb912bSLars Ellenberg 		*len = i;
12045bb912bSLars Ellenberg 	return page;
12145bb912bSLars Ellenberg }
12245bb912bSLars Ellenberg 
12345bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12445bb912bSLars Ellenberg {
12545bb912bSLars Ellenberg 	struct page *tmp;
12645bb912bSLars Ellenberg 	int i = 0;
12745bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12845bb912bSLars Ellenberg 		put_page(page);
12945bb912bSLars Ellenberg 		++i;
13045bb912bSLars Ellenberg 	}
13145bb912bSLars Ellenberg 	return i;
13245bb912bSLars Ellenberg }
13345bb912bSLars Ellenberg 
13445bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13545bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13645bb912bSLars Ellenberg {
13745bb912bSLars Ellenberg #if 1
13845bb912bSLars Ellenberg 	struct page *tmp;
13945bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14045bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14145bb912bSLars Ellenberg #endif
14245bb912bSLars Ellenberg 
14345bb912bSLars Ellenberg 	/* add chain to head */
14445bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14545bb912bSLars Ellenberg 	*head = chain_first;
14645bb912bSLars Ellenberg }
14745bb912bSLars Ellenberg 
148b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14918c2d522SAndreas Gruenbacher 				       unsigned int number)
150b411b363SPhilipp Reisner {
151b411b363SPhilipp Reisner 	struct page *page = NULL;
15245bb912bSLars Ellenberg 	struct page *tmp = NULL;
15318c2d522SAndreas Gruenbacher 	unsigned int i = 0;
154b411b363SPhilipp Reisner 
155b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
156b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15745bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
158b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15945bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16045bb912bSLars Ellenberg 		if (page)
16145bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
162b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16345bb912bSLars Ellenberg 		if (page)
16445bb912bSLars Ellenberg 			return page;
165b411b363SPhilipp Reisner 	}
16645bb912bSLars Ellenberg 
167b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
168b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
169b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17045bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17145bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17245bb912bSLars Ellenberg 		if (!tmp)
17345bb912bSLars Ellenberg 			break;
17445bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17545bb912bSLars Ellenberg 		page = tmp;
17645bb912bSLars Ellenberg 	}
17745bb912bSLars Ellenberg 
17845bb912bSLars Ellenberg 	if (i == number)
179b411b363SPhilipp Reisner 		return page;
18045bb912bSLars Ellenberg 
18145bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
182c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18345bb912bSLars Ellenberg 	 * function "soon". */
18445bb912bSLars Ellenberg 	if (page) {
18545bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18645bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18745bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18845bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18945bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19045bb912bSLars Ellenberg 	}
19145bb912bSLars Ellenberg 	return NULL;
192b411b363SPhilipp Reisner }
193b411b363SPhilipp Reisner 
194b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
195a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
196b411b363SPhilipp Reisner {
197a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
198b411b363SPhilipp Reisner 
199b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
200b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
201b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
202b411b363SPhilipp Reisner 	   stop to examine the list... */
203b411b363SPhilipp Reisner 
204a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
205045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
206b411b363SPhilipp Reisner 			break;
207a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
208b411b363SPhilipp Reisner 	}
209b411b363SPhilipp Reisner }
210b411b363SPhilipp Reisner 
211668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
212b411b363SPhilipp Reisner {
213b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
214db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
215b411b363SPhilipp Reisner 
2160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
217b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2180500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
219a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
220b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
221b411b363SPhilipp Reisner }
222b411b363SPhilipp Reisner 
223668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
224668700b4SPhilipp Reisner {
225668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
226668700b4SPhilipp Reisner 	int vnr;
227668700b4SPhilipp Reisner 
228668700b4SPhilipp Reisner 	rcu_read_lock();
229668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
230668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
231668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
232668700b4SPhilipp Reisner 			continue;
233668700b4SPhilipp Reisner 
234668700b4SPhilipp Reisner 		kref_get(&device->kref);
235668700b4SPhilipp Reisner 		rcu_read_unlock();
236668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
237668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
238668700b4SPhilipp Reisner 		rcu_read_lock();
239668700b4SPhilipp Reisner 	}
240668700b4SPhilipp Reisner 	rcu_read_unlock();
241668700b4SPhilipp Reisner }
242668700b4SPhilipp Reisner 
243b411b363SPhilipp Reisner /**
244c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2459b48ff07SLee Jones  * @peer_device:	DRBD device.
24645bb912bSLars Ellenberg  * @number:		number of pages requested
24745bb912bSLars Ellenberg  * @retry:		whether to retry, if not enough pages are available right now
248b411b363SPhilipp Reisner  *
24945bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2500e49d7b0SLars Ellenberg  * the kernel.
25145bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25245bb912bSLars Ellenberg  *
2530e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2540e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2550e49d7b0SLars Ellenberg  *
2560e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2570e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2580e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2590e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2600e49d7b0SLars Ellenberg  *
26145bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
262b411b363SPhilipp Reisner  */
26369a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
264c37c8ecfSAndreas Gruenbacher 			      bool retry)
265b411b363SPhilipp Reisner {
26669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
267b411b363SPhilipp Reisner 	struct page *page = NULL;
26844ed167dSPhilipp Reisner 	struct net_conf *nc;
269b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2700e49d7b0SLars Ellenberg 	unsigned int mxb;
271b411b363SPhilipp Reisner 
27244ed167dSPhilipp Reisner 	rcu_read_lock();
27369a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27444ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27544ed167dSPhilipp Reisner 	rcu_read_unlock();
27644ed167dSPhilipp Reisner 
277b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
278b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
279b411b363SPhilipp Reisner 
280668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
281668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
282668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
283668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
284668700b4SPhilipp Reisner 
28545bb912bSLars Ellenberg 	while (page == NULL) {
286b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
287b411b363SPhilipp Reisner 
288668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
289b411b363SPhilipp Reisner 
290b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
291b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
292b411b363SPhilipp Reisner 			if (page)
293b411b363SPhilipp Reisner 				break;
294b411b363SPhilipp Reisner 		}
295b411b363SPhilipp Reisner 
296b411b363SPhilipp Reisner 		if (!retry)
297b411b363SPhilipp Reisner 			break;
298b411b363SPhilipp Reisner 
299b411b363SPhilipp Reisner 		if (signal_pending(current)) {
300d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
301b411b363SPhilipp Reisner 			break;
302b411b363SPhilipp Reisner 		}
303b411b363SPhilipp Reisner 
3040e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3050e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
306b411b363SPhilipp Reisner 	}
307b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
308b411b363SPhilipp Reisner 
30945bb912bSLars Ellenberg 	if (page)
310b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
311b411b363SPhilipp Reisner 	return page;
312b411b363SPhilipp Reisner }
313b411b363SPhilipp Reisner 
314c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3150500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31645bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31745bb912bSLars Ellenberg  * or returns all pages to the system. */
318b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
319b411b363SPhilipp Reisner {
320b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
321b411b363SPhilipp Reisner 	int i;
322435f0740SLars Ellenberg 
323a73ff323SLars Ellenberg 	if (page == NULL)
324a73ff323SLars Ellenberg 		return;
325a73ff323SLars Ellenberg 
326183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32745bb912bSLars Ellenberg 		i = page_chain_free(page);
32845bb912bSLars Ellenberg 	else {
32945bb912bSLars Ellenberg 		struct page *tmp;
33045bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
331b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33245bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33345bb912bSLars Ellenberg 		drbd_pp_vacant += i;
334b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
335b411b363SPhilipp Reisner 	}
336435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33745bb912bSLars Ellenberg 	if (i < 0)
338d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
339435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
340b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
341b411b363SPhilipp Reisner }
342b411b363SPhilipp Reisner 
343b411b363SPhilipp Reisner /*
344b411b363SPhilipp Reisner You need to hold the req_lock:
345b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
346b411b363SPhilipp Reisner 
347b411b363SPhilipp Reisner You must not have the req_lock:
3483967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3490db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3507721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
351b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
352a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
353b411b363SPhilipp Reisner  drbd_clear_done_ee()
354b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
355b411b363SPhilipp Reisner */
356b411b363SPhilipp Reisner 
3579104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3589104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3599104d31aSLars Ellenberg  * trim: payload_size == 0 */
360f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36169a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3629104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
363b411b363SPhilipp Reisner {
36469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
365db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
366a73ff323SLars Ellenberg 	struct page *page = NULL;
3679104d31aSLars Ellenberg 	unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
368b411b363SPhilipp Reisner 
369b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
370b411b363SPhilipp Reisner 		return NULL;
371b411b363SPhilipp Reisner 
3720892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
373db830c46SAndreas Gruenbacher 	if (!peer_req) {
374b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
375d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
376b411b363SPhilipp Reisner 		return NULL;
377b411b363SPhilipp Reisner 	}
378b411b363SPhilipp Reisner 
3799104d31aSLars Ellenberg 	if (nr_pages) {
380d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
381d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38245bb912bSLars Ellenberg 		if (!page)
38345bb912bSLars Ellenberg 			goto fail;
384a73ff323SLars Ellenberg 	}
385b411b363SPhilipp Reisner 
386c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
387c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
388db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3899104d31aSLars Ellenberg 	peer_req->i.size = request_size;
390db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
391c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
392a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
393db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3949a8e7753SAndreas Gruenbacher 	/*
3959a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3969a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3979a8e7753SAndreas Gruenbacher 	 */
398db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
399b411b363SPhilipp Reisner 
400db830c46SAndreas Gruenbacher 	return peer_req;
401b411b363SPhilipp Reisner 
40245bb912bSLars Ellenberg  fail:
4030892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
404b411b363SPhilipp Reisner 	return NULL;
405b411b363SPhilipp Reisner }
406b411b363SPhilipp Reisner 
407b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
408f6ffca9fSAndreas Gruenbacher 		       int is_net)
409b411b363SPhilipp Reisner {
41021ae5d7fSLars Ellenberg 	might_sleep();
411db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
412db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
413b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4140b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4150b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41621ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41721ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41821ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41921ae5d7fSLars Ellenberg 	}
4200892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
421b411b363SPhilipp Reisner }
422b411b363SPhilipp Reisner 
423b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
424b411b363SPhilipp Reisner {
425b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
426db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
427b411b363SPhilipp Reisner 	int count = 0;
428b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
429b411b363SPhilipp Reisner 
4300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
431b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
433b411b363SPhilipp Reisner 
434a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
435b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
436b411b363SPhilipp Reisner 		count++;
437b411b363SPhilipp Reisner 	}
438b411b363SPhilipp Reisner 	return count;
439b411b363SPhilipp Reisner }
440b411b363SPhilipp Reisner 
441b411b363SPhilipp Reisner /*
442a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
443b411b363SPhilipp Reisner  */
444b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
445b411b363SPhilipp Reisner {
446b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
447b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
448db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
449e2b3032bSAndreas Gruenbacher 	int err = 0;
450b411b363SPhilipp Reisner 
4510500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
452b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
453b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4540500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
455b411b363SPhilipp Reisner 
456a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
457b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
458b411b363SPhilipp Reisner 
459b411b363SPhilipp Reisner 	/* possible callbacks here:
460d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
461b411b363SPhilipp Reisner 	 * all ignore the last argument.
462b411b363SPhilipp Reisner 	 */
463a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
464e2b3032bSAndreas Gruenbacher 		int err2;
465e2b3032bSAndreas Gruenbacher 
466b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
467a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
468e2b3032bSAndreas Gruenbacher 		if (!err)
469e2b3032bSAndreas Gruenbacher 			err = err2;
470b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
471b411b363SPhilipp Reisner 	}
472b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
473b411b363SPhilipp Reisner 
474e2b3032bSAndreas Gruenbacher 	return err;
475b411b363SPhilipp Reisner }
476b411b363SPhilipp Reisner 
477b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
478d4da1537SAndreas Gruenbacher 				     struct list_head *head)
479b411b363SPhilipp Reisner {
480b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
481b411b363SPhilipp Reisner 
482b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
483b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
484b411b363SPhilipp Reisner 	while (!list_empty(head)) {
485b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4860500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4877eaceaccSJens Axboe 		io_schedule();
488b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4890500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
490b411b363SPhilipp Reisner 	}
491b411b363SPhilipp Reisner }
492b411b363SPhilipp Reisner 
493b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
494d4da1537SAndreas Gruenbacher 				    struct list_head *head)
495b411b363SPhilipp Reisner {
4960500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
497b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
499b411b363SPhilipp Reisner }
500b411b363SPhilipp Reisner 
501dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
502b411b363SPhilipp Reisner {
503b411b363SPhilipp Reisner 	struct kvec iov = {
504b411b363SPhilipp Reisner 		.iov_base = buf,
505b411b363SPhilipp Reisner 		.iov_len = size,
506b411b363SPhilipp Reisner 	};
507b411b363SPhilipp Reisner 	struct msghdr msg = {
508b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
509b411b363SPhilipp Reisner 	};
510aa563d7bSDavid Howells 	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
511f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
512b411b363SPhilipp Reisner }
513b411b363SPhilipp Reisner 
514bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
515b411b363SPhilipp Reisner {
516b411b363SPhilipp Reisner 	int rv;
517b411b363SPhilipp Reisner 
518bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
519b411b363SPhilipp Reisner 
520b411b363SPhilipp Reisner 	if (rv < 0) {
521b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5221ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
523b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5241ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
525b411b363SPhilipp Reisner 	} else if (rv == 0) {
526bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
527b66623e3SPhilipp Reisner 			long t;
528b66623e3SPhilipp Reisner 			rcu_read_lock();
529bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
530b66623e3SPhilipp Reisner 			rcu_read_unlock();
531b66623e3SPhilipp Reisner 
532bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
533b66623e3SPhilipp Reisner 
534599377acSPhilipp Reisner 			if (t)
535599377acSPhilipp Reisner 				goto out;
536599377acSPhilipp Reisner 		}
5371ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
538599377acSPhilipp Reisner 	}
539599377acSPhilipp Reisner 
540b411b363SPhilipp Reisner 	if (rv != size)
541bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
542b411b363SPhilipp Reisner 
543599377acSPhilipp Reisner out:
544b411b363SPhilipp Reisner 	return rv;
545b411b363SPhilipp Reisner }
546b411b363SPhilipp Reisner 
547bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
548c6967746SAndreas Gruenbacher {
549c6967746SAndreas Gruenbacher 	int err;
550c6967746SAndreas Gruenbacher 
551bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
552c6967746SAndreas Gruenbacher 	if (err != size) {
553c6967746SAndreas Gruenbacher 		if (err >= 0)
554c6967746SAndreas Gruenbacher 			err = -EIO;
555c6967746SAndreas Gruenbacher 	} else
556c6967746SAndreas Gruenbacher 		err = 0;
557c6967746SAndreas Gruenbacher 	return err;
558c6967746SAndreas Gruenbacher }
559c6967746SAndreas Gruenbacher 
560bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
561a5c31904SAndreas Gruenbacher {
562a5c31904SAndreas Gruenbacher 	int err;
563a5c31904SAndreas Gruenbacher 
564bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
565a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5661ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
567a5c31904SAndreas Gruenbacher 	return err;
568a5c31904SAndreas Gruenbacher }
569a5c31904SAndreas Gruenbacher 
5705dbf1673SLars Ellenberg /* quoting tcp(7):
5715dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5725dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5735dbf1673SLars Ellenberg  * This is our wrapper to do so.
5745dbf1673SLars Ellenberg  */
5755dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5765dbf1673SLars Ellenberg 		unsigned int rcv)
5775dbf1673SLars Ellenberg {
5785dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5795dbf1673SLars Ellenberg 	if (snd) {
5805dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5815dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5825dbf1673SLars Ellenberg 	}
5835dbf1673SLars Ellenberg 	if (rcv) {
5845dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5855dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5865dbf1673SLars Ellenberg 	}
5875dbf1673SLars Ellenberg }
5885dbf1673SLars Ellenberg 
589bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
590b411b363SPhilipp Reisner {
591b411b363SPhilipp Reisner 	const char *what;
592b411b363SPhilipp Reisner 	struct socket *sock;
593b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59444ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59544ed167dSPhilipp Reisner 	struct net_conf *nc;
59644ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59769ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
598b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
599b411b363SPhilipp Reisner 
60044ed167dSPhilipp Reisner 	rcu_read_lock();
601bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60244ed167dSPhilipp Reisner 	if (!nc) {
60344ed167dSPhilipp Reisner 		rcu_read_unlock();
604b411b363SPhilipp Reisner 		return NULL;
60544ed167dSPhilipp Reisner 	}
60644ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60744ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60869ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
609089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61044ed167dSPhilipp Reisner 
611bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
612bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61544ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61644ed167dSPhilipp Reisner 	else
61744ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61844ed167dSPhilipp Reisner 
619bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
620bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
621b411b363SPhilipp Reisner 
622b411b363SPhilipp Reisner 	what = "sock_create_kern";
623eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
624b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
625b411b363SPhilipp Reisner 	if (err < 0) {
626b411b363SPhilipp Reisner 		sock = NULL;
627b411b363SPhilipp Reisner 		goto out;
628b411b363SPhilipp Reisner 	}
629b411b363SPhilipp Reisner 
630b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63169ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63244ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
633b411b363SPhilipp Reisner 
634b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
635b411b363SPhilipp Reisner 	*  for the outgoing connections.
636b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
637b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
638b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
639b411b363SPhilipp Reisner 	*  a free one dynamically.
640b411b363SPhilipp Reisner 	*/
641b411b363SPhilipp Reisner 	what = "bind before connect";
64244ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
643b411b363SPhilipp Reisner 	if (err < 0)
644b411b363SPhilipp Reisner 		goto out;
645b411b363SPhilipp Reisner 
646b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
647b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
648b411b363SPhilipp Reisner 	disconnect_on_error = 0;
649b411b363SPhilipp Reisner 	what = "connect";
65044ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner out:
653b411b363SPhilipp Reisner 	if (err < 0) {
654b411b363SPhilipp Reisner 		if (sock) {
655b411b363SPhilipp Reisner 			sock_release(sock);
656b411b363SPhilipp Reisner 			sock = NULL;
657b411b363SPhilipp Reisner 		}
658b411b363SPhilipp Reisner 		switch (-err) {
659b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
660b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
661b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
662b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
663b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
664b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
665b411b363SPhilipp Reisner 			disconnect_on_error = 0;
666b411b363SPhilipp Reisner 			break;
667b411b363SPhilipp Reisner 		default:
6681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
669b411b363SPhilipp Reisner 		}
670b411b363SPhilipp Reisner 		if (disconnect_on_error)
671bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
672b411b363SPhilipp Reisner 	}
67344ed167dSPhilipp Reisner 
674b411b363SPhilipp Reisner 	return sock;
675b411b363SPhilipp Reisner }
676b411b363SPhilipp Reisner 
6777a426fd8SPhilipp Reisner struct accept_wait_data {
678bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6797a426fd8SPhilipp Reisner 	struct socket *s_listen;
6807a426fd8SPhilipp Reisner 	struct completion door_bell;
6817a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6827a426fd8SPhilipp Reisner 
6837a426fd8SPhilipp Reisner };
6847a426fd8SPhilipp Reisner 
685715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
686b411b363SPhilipp Reisner {
6877a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
688715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6897a426fd8SPhilipp Reisner 
690715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
691715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6927a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
693715306f6SAndreas Gruenbacher 	state_change(sk);
6947a426fd8SPhilipp Reisner }
6957a426fd8SPhilipp Reisner 
696bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
697b411b363SPhilipp Reisner {
6981f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69944ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7001f3e509bSPhilipp Reisner 	struct socket *s_listen;
70144ed167dSPhilipp Reisner 	struct net_conf *nc;
702b411b363SPhilipp Reisner 	const char *what;
703b411b363SPhilipp Reisner 
70444ed167dSPhilipp Reisner 	rcu_read_lock();
705bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70644ed167dSPhilipp Reisner 	if (!nc) {
70744ed167dSPhilipp Reisner 		rcu_read_unlock();
7087a426fd8SPhilipp Reisner 		return -EIO;
70944ed167dSPhilipp Reisner 	}
71044ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71144ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71244ed167dSPhilipp Reisner 	rcu_read_unlock();
713b411b363SPhilipp Reisner 
714bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
715bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
716b411b363SPhilipp Reisner 
717b411b363SPhilipp Reisner 	what = "sock_create_kern";
718eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
719b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
720b411b363SPhilipp Reisner 	if (err) {
721b411b363SPhilipp Reisner 		s_listen = NULL;
722b411b363SPhilipp Reisner 		goto out;
723b411b363SPhilipp Reisner 	}
724b411b363SPhilipp Reisner 
7254a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72644ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
727b411b363SPhilipp Reisner 
728b411b363SPhilipp Reisner 	what = "bind before listen";
72944ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
730b411b363SPhilipp Reisner 	if (err < 0)
731b411b363SPhilipp Reisner 		goto out;
732b411b363SPhilipp Reisner 
7337a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7347a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7357a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
736715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7377a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7387a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
739b411b363SPhilipp Reisner 
7402820fd39SPhilipp Reisner 	what = "listen";
7412820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7422820fd39SPhilipp Reisner 	if (err < 0)
7432820fd39SPhilipp Reisner 		goto out;
7442820fd39SPhilipp Reisner 
7457a426fd8SPhilipp Reisner 	return 0;
746b411b363SPhilipp Reisner out:
747b411b363SPhilipp Reisner 	if (s_listen)
748b411b363SPhilipp Reisner 		sock_release(s_listen);
749b411b363SPhilipp Reisner 	if (err < 0) {
750b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7511ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
752bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
753b411b363SPhilipp Reisner 		}
754b411b363SPhilipp Reisner 	}
7551f3e509bSPhilipp Reisner 
7567a426fd8SPhilipp Reisner 	return -EIO;
7571f3e509bSPhilipp Reisner }
7581f3e509bSPhilipp Reisner 
759715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
760715306f6SAndreas Gruenbacher {
761715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
762715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
763715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
764715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher }
766715306f6SAndreas Gruenbacher 
767bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7681f3e509bSPhilipp Reisner {
7691f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7701f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7711f3e509bSPhilipp Reisner 	struct net_conf *nc;
7721f3e509bSPhilipp Reisner 
7731f3e509bSPhilipp Reisner 	rcu_read_lock();
774bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7751f3e509bSPhilipp Reisner 	if (!nc) {
7761f3e509bSPhilipp Reisner 		rcu_read_unlock();
7771f3e509bSPhilipp Reisner 		return NULL;
7781f3e509bSPhilipp Reisner 	}
7791f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7801f3e509bSPhilipp Reisner 	rcu_read_unlock();
7811f3e509bSPhilipp Reisner 
7821f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78338b682b2SAkinobu Mita 	/* 28.5% random jitter */
78438b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7851f3e509bSPhilipp Reisner 
7867a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7877a426fd8SPhilipp Reisner 	if (err <= 0)
7887a426fd8SPhilipp Reisner 		return NULL;
7891f3e509bSPhilipp Reisner 
7907a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
791b411b363SPhilipp Reisner 	if (err < 0) {
792b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
794bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
795b411b363SPhilipp Reisner 		}
796b411b363SPhilipp Reisner 	}
797b411b363SPhilipp Reisner 
798715306f6SAndreas Gruenbacher 	if (s_estab)
799715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
800b411b363SPhilipp Reisner 
801b411b363SPhilipp Reisner 	return s_estab;
802b411b363SPhilipp Reisner }
803b411b363SPhilipp Reisner 
804bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
805b411b363SPhilipp Reisner 
806bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8079f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8089f5bdc33SAndreas Gruenbacher {
809bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8109f5bdc33SAndreas Gruenbacher 		return -EIO;
811bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
814bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
815b411b363SPhilipp Reisner {
816bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8179f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8184920e37aSPhilipp Reisner 	struct net_conf *nc;
8199f5bdc33SAndreas Gruenbacher 	int err;
820b411b363SPhilipp Reisner 
8214920e37aSPhilipp Reisner 	rcu_read_lock();
8224920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8234920e37aSPhilipp Reisner 	if (!nc) {
8244920e37aSPhilipp Reisner 		rcu_read_unlock();
8254920e37aSPhilipp Reisner 		return -EIO;
8264920e37aSPhilipp Reisner 	}
8274920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8284920e37aSPhilipp Reisner 	rcu_read_unlock();
8294920e37aSPhilipp Reisner 
830bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8319f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8329f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8339f5bdc33SAndreas Gruenbacher 			err = -EIO;
8349f5bdc33SAndreas Gruenbacher 		return err;
8359f5bdc33SAndreas Gruenbacher 	}
836bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8379f5bdc33SAndreas Gruenbacher 	if (err)
8389f5bdc33SAndreas Gruenbacher 		return err;
8399f5bdc33SAndreas Gruenbacher 	return pi.cmd;
840b411b363SPhilipp Reisner }
841b411b363SPhilipp Reisner 
842b411b363SPhilipp Reisner /**
843b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
844b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
845b411b363SPhilipp Reisner  */
8465d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
847b411b363SPhilipp Reisner {
848b411b363SPhilipp Reisner 	int rr;
849b411b363SPhilipp Reisner 	char tb[4];
850b411b363SPhilipp Reisner 
851b411b363SPhilipp Reisner 	if (!*sock)
85281e84650SAndreas Gruenbacher 		return false;
853b411b363SPhilipp Reisner 
854dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
855b411b363SPhilipp Reisner 
856b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85781e84650SAndreas Gruenbacher 		return true;
858b411b363SPhilipp Reisner 	} else {
859b411b363SPhilipp Reisner 		sock_release(*sock);
860b411b363SPhilipp Reisner 		*sock = NULL;
86181e84650SAndreas Gruenbacher 		return false;
862b411b363SPhilipp Reisner 	}
863b411b363SPhilipp Reisner }
8645d0b17f1SPhilipp Reisner 
8655d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8665d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8675d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8685d0b17f1SPhilipp Reisner {
8695d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8705d0b17f1SPhilipp Reisner 	int timeout;
8715d0b17f1SPhilipp Reisner 	bool ok;
8725d0b17f1SPhilipp Reisner 
8735d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8745d0b17f1SPhilipp Reisner 		return false;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	rcu_read_lock();
8775d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8785d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8795d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8805d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8815d0b17f1SPhilipp Reisner 
8825d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8835d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	return ok;
8865d0b17f1SPhilipp Reisner }
8875d0b17f1SPhilipp Reisner 
8882325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8892325eb66SPhilipp Reisner    in a connection */
89069a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
891907599e0SPhilipp Reisner {
89269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8930829f5edSAndreas Gruenbacher 	int err;
894907599e0SPhilipp Reisner 
895b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
896b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
897907599e0SPhilipp Reisner 
89869a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89969a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
900b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9018410da8fSPhilipp Reisner 
90269a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9030829f5edSAndreas Gruenbacher 	if (!err)
90469a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9050829f5edSAndreas Gruenbacher 	if (!err)
90669a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9070829f5edSAndreas Gruenbacher 	if (!err)
90869a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
909b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
910b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
911b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
912b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9130829f5edSAndreas Gruenbacher 	return err;
914907599e0SPhilipp Reisner }
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner /*
917b411b363SPhilipp Reisner  * return values:
918b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
919b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
920b411b363SPhilipp Reisner  *  -1 peer talks different language,
921b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
922b411b363SPhilipp Reisner  *  -2 We do not have a network config...
923b411b363SPhilipp Reisner  */
924bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
925b411b363SPhilipp Reisner {
9267da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
927c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92844ed167dSPhilipp Reisner 	struct net_conf *nc;
9295d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9305d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
931197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9327a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
933bde89a9eSAndreas Gruenbacher 		.connection = connection,
9347a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9357a426fd8SPhilipp Reisner 	};
936b411b363SPhilipp Reisner 
937bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
938bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
939b411b363SPhilipp Reisner 		return -2;
940b411b363SPhilipp Reisner 
9417da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
942bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
943bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9447da35862SPhilipp Reisner 	sock.socket = NULL;
9457da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
946bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
947bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9487da35862SPhilipp Reisner 	msock.socket = NULL;
9497da35862SPhilipp Reisner 
9500916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
951bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
952b411b363SPhilipp Reisner 
953bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9547a426fd8SPhilipp Reisner 		return 0;
955b411b363SPhilipp Reisner 
956b411b363SPhilipp Reisner 	do {
9572bf89621SAndreas Gruenbacher 		struct socket *s;
958b411b363SPhilipp Reisner 
959bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
960b411b363SPhilipp Reisner 		if (s) {
9617da35862SPhilipp Reisner 			if (!sock.socket) {
9627da35862SPhilipp Reisner 				sock.socket = s;
963bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9647da35862SPhilipp Reisner 			} else if (!msock.socket) {
965bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9667da35862SPhilipp Reisner 				msock.socket = s;
967bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
968b411b363SPhilipp Reisner 			} else {
9691ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
970b411b363SPhilipp Reisner 				goto out_release_sockets;
971b411b363SPhilipp Reisner 			}
972b411b363SPhilipp Reisner 		}
973b411b363SPhilipp Reisner 
9745d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
975b411b363SPhilipp Reisner 			break;
976b411b363SPhilipp Reisner 
977b411b363SPhilipp Reisner retry:
978bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
979b411b363SPhilipp Reisner 		if (s) {
980bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9817da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9827da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98392f14951SPhilipp Reisner 			switch (fp) {
984e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9857da35862SPhilipp Reisner 				if (sock.socket) {
9861ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9877da35862SPhilipp Reisner 					sock_release(sock.socket);
98880c6eed4SPhilipp Reisner 					sock.socket = s;
98980c6eed4SPhilipp Reisner 					goto randomize;
990b411b363SPhilipp Reisner 				}
9917da35862SPhilipp Reisner 				sock.socket = s;
992b411b363SPhilipp Reisner 				break;
993e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
994bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9957da35862SPhilipp Reisner 				if (msock.socket) {
9961ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9977da35862SPhilipp Reisner 					sock_release(msock.socket);
99880c6eed4SPhilipp Reisner 					msock.socket = s;
99980c6eed4SPhilipp Reisner 					goto randomize;
1000b411b363SPhilipp Reisner 				}
10017da35862SPhilipp Reisner 				msock.socket = s;
1002b411b363SPhilipp Reisner 				break;
1003b411b363SPhilipp Reisner 			default:
10041ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1005b411b363SPhilipp Reisner 				sock_release(s);
100680c6eed4SPhilipp Reisner randomize:
100738b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1008b411b363SPhilipp Reisner 					goto retry;
1009b411b363SPhilipp Reisner 			}
1010b411b363SPhilipp Reisner 		}
1011b411b363SPhilipp Reisner 
1012bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1013b411b363SPhilipp Reisner 			goto out_release_sockets;
1014b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1015b411b363SPhilipp Reisner 			flush_signals(current);
1016b411b363SPhilipp Reisner 			smp_rmb();
1017bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1018b411b363SPhilipp Reisner 				goto out_release_sockets;
1019b411b363SPhilipp Reisner 		}
1020b411b363SPhilipp Reisner 
10215d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1022b666dbf8SPhilipp Reisner 	} while (!ok);
1023b411b363SPhilipp Reisner 
10247a426fd8SPhilipp Reisner 	if (ad.s_listen)
10257a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1026b411b363SPhilipp Reisner 
102798683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102898683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1029b411b363SPhilipp Reisner 
10307da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10317da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1035b411b363SPhilipp Reisner 
1036b411b363SPhilipp Reisner 	/* NOT YET ...
1037bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10387da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10396038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1040b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104144ed167dSPhilipp Reisner 	rcu_read_lock();
1042bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1043b411b363SPhilipp Reisner 
10447da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10457da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104644ed167dSPhilipp Reisner 
10477da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104844ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104908b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105044ed167dSPhilipp Reisner 	rcu_read_unlock();
105144ed167dSPhilipp Reisner 
10527da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1053b411b363SPhilipp Reisner 
1054b411b363SPhilipp Reisner 	/* we don't want delays.
105525985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105612abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105712abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1058b411b363SPhilipp Reisner 
1059bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1060bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1061bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1062b411b363SPhilipp Reisner 
1063bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1064b411b363SPhilipp Reisner 	if (h <= 0)
1065b411b363SPhilipp Reisner 		return h;
1066b411b363SPhilipp Reisner 
1067bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1068b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1069bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1070b10d96cbSJohannes Thoma 		case -1:
10711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1072b411b363SPhilipp Reisner 			return -1;
1073b10d96cbSJohannes Thoma 		case 0:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1075b10d96cbSJohannes Thoma 			return 0;
1076b411b363SPhilipp Reisner 		}
1077b411b363SPhilipp Reisner 	}
1078b411b363SPhilipp Reisner 
1079bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1080bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10837e2455c1SPhilipp Reisner 		return -1;
10841e86ac48SPhilipp Reisner 
108513c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108613c76abaSPhilipp Reisner 	 * being promoted to Primary.
108713c76abaSPhilipp Reisner 	 *
108813c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108913c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109013c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109113c76abaSPhilipp Reisner 	 */
109231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109331007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109431007745SPhilipp Reisner 
1095cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1096cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109731007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1098cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112739e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112839e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112939e91a60SLars Ellenberg 	connection->ack_sender =
113039e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1131668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1132668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1133668700b4SPhilipp Reisner 		return 0;
1134668700b4SPhilipp Reisner 	}
1135b411b363SPhilipp Reisner 
11360500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113708b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113808b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113908b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114008b165baSPhilipp Reisner 	 * just to clear a single value. */
1141bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11420500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114308b165baSPhilipp Reisner 
1144d3fcb490SPhilipp Reisner 	return h;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner out_release_sockets:
11477a426fd8SPhilipp Reisner 	if (ad.s_listen)
11487a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11497da35862SPhilipp Reisner 	if (sock.socket)
11507da35862SPhilipp Reisner 		sock_release(sock.socket);
11517da35862SPhilipp Reisner 	if (msock.socket)
11527da35862SPhilipp Reisner 		sock_release(msock.socket);
1153b411b363SPhilipp Reisner 	return -1;
1154b411b363SPhilipp Reisner }
1155b411b363SPhilipp Reisner 
1156bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1157b411b363SPhilipp Reisner {
1158bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1159b411b363SPhilipp Reisner 
11600c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11610c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11620c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11630c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11650c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116602918be2SPhilipp Reisner 		}
11670c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11680c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11690c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11700c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1171e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1172e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1173e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1174b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1175eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1176e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1177e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1178e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1179e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1180e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118177351055SPhilipp Reisner 		pi->vnr = 0;
118202918be2SPhilipp Reisner 	} else {
11831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1184e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1185bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11868172f3e9SAndreas Gruenbacher 		return -EINVAL;
1187b411b363SPhilipp Reisner 	}
1188e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11898172f3e9SAndreas Gruenbacher 	return 0;
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
1192c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1193c51a0ef3SLars Ellenberg {
1194c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1195c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1196c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1197c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1198c51a0ef3SLars Ellenberg }
1199c51a0ef3SLars Ellenberg 
1200bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1201257d0af6SPhilipp Reisner {
1202bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120369bc7bc3SAndreas Gruenbacher 	int err;
1204257d0af6SPhilipp Reisner 
1205bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1206a5c31904SAndreas Gruenbacher 	if (err)
120769bc7bc3SAndreas Gruenbacher 		return err;
1208257d0af6SPhilipp Reisner 
1209bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1210bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1211b411b363SPhilipp Reisner 
121269bc7bc3SAndreas Gruenbacher 	return err;
1213b411b363SPhilipp Reisner }
1214b411b363SPhilipp Reisner 
1215c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1216c51a0ef3SLars Ellenberg {
1217c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1218c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1219c51a0ef3SLars Ellenberg 	int err;
1220c51a0ef3SLars Ellenberg 
1221c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1222c51a0ef3SLars Ellenberg 	if (err != size) {
1223c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1224c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1225c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1226c51a0ef3SLars Ellenberg 		 * received so far. */
1227c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1228ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1229c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1230c51a0ef3SLars Ellenberg 		}
1231c51a0ef3SLars Ellenberg 		if (err > 0) {
1232c51a0ef3SLars Ellenberg 			buffer += err;
1233c51a0ef3SLars Ellenberg 			size -= err;
1234c51a0ef3SLars Ellenberg 		}
1235c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1236c51a0ef3SLars Ellenberg 		if (err)
1237c51a0ef3SLars Ellenberg 			return err;
1238c51a0ef3SLars Ellenberg 	}
1239c51a0ef3SLars Ellenberg 
1240c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1241c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1242c51a0ef3SLars Ellenberg 
1243c51a0ef3SLars Ellenberg 	return err;
1244c51a0ef3SLars Ellenberg }
1245f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1246f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1247f9ff0da5SLars Ellenberg  * then wait for all completions.
1248f9ff0da5SLars Ellenberg  */
1249f9ff0da5SLars Ellenberg struct issue_flush_context {
1250f9ff0da5SLars Ellenberg 	atomic_t pending;
1251f9ff0da5SLars Ellenberg 	int error;
1252f9ff0da5SLars Ellenberg 	struct completion done;
1253f9ff0da5SLars Ellenberg };
1254f9ff0da5SLars Ellenberg struct one_flush_context {
1255f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1256f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1257f9ff0da5SLars Ellenberg };
1258f9ff0da5SLars Ellenberg 
12591ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1260f9ff0da5SLars Ellenberg {
1261f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1262f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1263f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1264f9ff0da5SLars Ellenberg 
12654e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12664e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12674e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1268f9ff0da5SLars Ellenberg 	}
1269f9ff0da5SLars Ellenberg 	kfree(octx);
1270f9ff0da5SLars Ellenberg 	bio_put(bio);
1271f9ff0da5SLars Ellenberg 
1272f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1273f9ff0da5SLars Ellenberg 	put_ldev(device);
1274f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1275f9ff0da5SLars Ellenberg 
1276f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1277f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1278f9ff0da5SLars Ellenberg }
1279f9ff0da5SLars Ellenberg 
1280f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1281f9ff0da5SLars Ellenberg {
1282*07888c66SChristoph Hellwig 	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
1283*07888c66SChristoph Hellwig 				    REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
1284f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
12854b1dc86dSChristoph Hellwig 
12864b1dc86dSChristoph Hellwig 	if (!octx) {
12874b1dc86dSChristoph Hellwig 		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
1288f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1289f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1290f9ff0da5SLars Ellenberg 		 */
1291f9ff0da5SLars Ellenberg 		bio_put(bio);
1292f9ff0da5SLars Ellenberg 
1293f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1294f9ff0da5SLars Ellenberg 		put_ldev(device);
1295f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1296f9ff0da5SLars Ellenberg 		return;
1297f9ff0da5SLars Ellenberg 	}
1298f9ff0da5SLars Ellenberg 
1299f9ff0da5SLars Ellenberg 	octx->device = device;
1300f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
1301f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1302f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
1303f9ff0da5SLars Ellenberg 
1304f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1305f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1306f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1307f9ff0da5SLars Ellenberg 	submit_bio(bio);
1308f9ff0da5SLars Ellenberg }
1309f9ff0da5SLars Ellenberg 
1310bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1311b411b363SPhilipp Reisner {
1312f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1313c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1314f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13154b0007c0SPhilipp Reisner 		int vnr;
1316b411b363SPhilipp Reisner 
1317f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1318f9ff0da5SLars Ellenberg 		ctx.error = 0;
1319f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1320f9ff0da5SLars Ellenberg 
1321615e087fSLars Ellenberg 		rcu_read_lock();
1322c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1323c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1324c06ece6bSAndreas Gruenbacher 
1325b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1326615e087fSLars Ellenberg 				continue;
1327b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1328615e087fSLars Ellenberg 			rcu_read_unlock();
13294b0007c0SPhilipp Reisner 
1330f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1331f9ff0da5SLars Ellenberg 
1332f9ff0da5SLars Ellenberg 			rcu_read_lock();
1333f9ff0da5SLars Ellenberg 		}
1334f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1335f9ff0da5SLars Ellenberg 
1336f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1337f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1338f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1339f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1340f9ff0da5SLars Ellenberg 
1341f9ff0da5SLars Ellenberg 		if (ctx.error) {
1342b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1343b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1344b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1345f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1346f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1347b411b363SPhilipp Reisner 		}
1348b411b363SPhilipp Reisner 	}
1349b411b363SPhilipp Reisner }
1350b411b363SPhilipp Reisner 
1351b411b363SPhilipp Reisner /**
1352b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13539b48ff07SLee Jones  * @connection:	DRBD connection.
1354b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1355b411b363SPhilipp Reisner  * @ev:		Epoch event.
1356b411b363SPhilipp Reisner  */
1357bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1358b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1359b411b363SPhilipp Reisner 					       enum epoch_event ev)
1360b411b363SPhilipp Reisner {
13612451fc3bSPhilipp Reisner 	int epoch_size;
1362b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1363b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1364b411b363SPhilipp Reisner 
1365bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1366b411b363SPhilipp Reisner 	do {
1367b411b363SPhilipp Reisner 		next_epoch = NULL;
1368b411b363SPhilipp Reisner 
1369b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1372b411b363SPhilipp Reisner 		case EV_PUT:
1373b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1374b411b363SPhilipp Reisner 			break;
1375b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1376b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1377b411b363SPhilipp Reisner 			break;
1378b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1379b411b363SPhilipp Reisner 			/* nothing to do*/
1380b411b363SPhilipp Reisner 			break;
1381b411b363SPhilipp Reisner 		}
1382b411b363SPhilipp Reisner 
1383b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1384b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138580f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1386b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1387bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1388bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1389bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1390b411b363SPhilipp Reisner 			}
13919ed57dcbSLars Ellenberg #if 0
13929ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13939ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139480f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1395bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13969ed57dcbSLars Ellenberg #endif
1397b411b363SPhilipp Reisner 
1398bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1399b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1400b411b363SPhilipp Reisner 				list_del(&epoch->list);
1401b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1402bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1403b411b363SPhilipp Reisner 				kfree(epoch);
1404b411b363SPhilipp Reisner 
1405b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1406b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1407b411b363SPhilipp Reisner 			} else {
1408b411b363SPhilipp Reisner 				epoch->flags = 0;
1409b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1410698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1411b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1412b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1413b411b363SPhilipp Reisner 			}
1414b411b363SPhilipp Reisner 		}
1415b411b363SPhilipp Reisner 
1416b411b363SPhilipp Reisner 		if (!next_epoch)
1417b411b363SPhilipp Reisner 			break;
1418b411b363SPhilipp Reisner 
1419b411b363SPhilipp Reisner 		epoch = next_epoch;
1420b411b363SPhilipp Reisner 	} while (1);
1421b411b363SPhilipp Reisner 
1422bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1423b411b363SPhilipp Reisner 
1424b411b363SPhilipp Reisner 	return rv;
1425b411b363SPhilipp Reisner }
1426b411b363SPhilipp Reisner 
14278fe39aacSPhilipp Reisner static enum write_ordering_e
14288fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14298fe39aacSPhilipp Reisner {
14308fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14318fe39aacSPhilipp Reisner 
14328fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14338fe39aacSPhilipp Reisner 
1434f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1435f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14388fe39aacSPhilipp Reisner 
14398fe39aacSPhilipp Reisner 	return wo;
14408fe39aacSPhilipp Reisner }
14418fe39aacSPhilipp Reisner 
14429b48ff07SLee Jones /*
1443b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1444b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1445b411b363SPhilipp Reisner  */
14468fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14478fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1448b411b363SPhilipp Reisner {
1449e9526580SPhilipp Reisner 	struct drbd_device *device;
1450b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14514b0007c0SPhilipp Reisner 	int vnr;
1452b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1453f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1454f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1455f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1456b411b363SPhilipp Reisner 	};
1457b411b363SPhilipp Reisner 
1458e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1459f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1460b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1461daeda1ccSPhilipp Reisner 	rcu_read_lock();
1462e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14638fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14648fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14658fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14668fe39aacSPhilipp Reisner 				bdev = NULL;
1467b30ab791SAndreas Gruenbacher 			put_ldev(device);
14684b0007c0SPhilipp Reisner 		}
14698fe39aacSPhilipp Reisner 	}
14708fe39aacSPhilipp Reisner 
14718fe39aacSPhilipp Reisner 	if (bdev)
14728fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14738fe39aacSPhilipp Reisner 
147470df7092SLars Ellenberg 	rcu_read_unlock();
147570df7092SLars Ellenberg 
1476e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1477f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1478e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1479b411b363SPhilipp Reisner }
1480b411b363SPhilipp Reisner 
1481f31e583aSLars Ellenberg /*
1482f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1483f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1484f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1485f31e583aSLars Ellenberg  * never even try to UNMAP.
1486f31e583aSLars Ellenberg  *
1487f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1488f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1489f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1490f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1491f31e583aSLars Ellenberg  *
1492f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1495f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1496f31e583aSLars Ellenberg  *
1497f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1498f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1499f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1500f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1501f31e583aSLars Ellenberg  * still behaves this way.
1502f31e583aSLars Ellenberg  *
1503f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1504f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1505f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1506f31e583aSLars Ellenberg  *
1507f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1508f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1509f31e583aSLars Ellenberg  */
1510f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1511f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1512dd4f699dSLars Ellenberg {
15130dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1514f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(bdev);
1515f31e583aSLars Ellenberg 	sector_t tmp, nr;
1516f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1517f31e583aSLars Ellenberg 	int alignment;
1518f31e583aSLars Ellenberg 	int err = 0;
1519dd4f699dSLars Ellenberg 
1520f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1521f31e583aSLars Ellenberg 		goto zero_out;
1522f31e583aSLars Ellenberg 
1523f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
1524f31e583aSLars Ellenberg 	granularity = max(q->limits.discard_granularity >> 9, 1U);
1525f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1526f31e583aSLars Ellenberg 
1527f31e583aSLars Ellenberg 	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1528f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1529f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1530f31e583aSLars Ellenberg 		goto zero_out;
1531f31e583aSLars Ellenberg 
1532f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1533f31e583aSLars Ellenberg 		goto zero_out;
1534f31e583aSLars Ellenberg 
1535f31e583aSLars Ellenberg 	tmp = start;
1536f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1537f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1538f31e583aSLars Ellenberg 			goto zero_out;
1539f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1540f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1541f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1542f31e583aSLars Ellenberg 
1543f31e583aSLars Ellenberg 		nr = tmp - start;
1544f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1545f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1546f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1547f31e583aSLars Ellenberg 		nr_sectors -= nr;
1548f31e583aSLars Ellenberg 		start = tmp;
1549f31e583aSLars Ellenberg 	}
1550f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
1551f31e583aSLars Ellenberg 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
1552f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1553f31e583aSLars Ellenberg 		start += max_discard_sectors;
1554f31e583aSLars Ellenberg 	}
1555f31e583aSLars Ellenberg 	if (nr_sectors) {
1556f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1557f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1558f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1559f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1560f31e583aSLars Ellenberg 		nr = nr_sectors;
1561f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1562f31e583aSLars Ellenberg 		if (nr) {
1563f31e583aSLars Ellenberg 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1564f31e583aSLars Ellenberg 			nr_sectors -= nr;
1565f31e583aSLars Ellenberg 			start += nr;
1566f31e583aSLars Ellenberg 		}
1567f31e583aSLars Ellenberg 	}
1568f31e583aSLars Ellenberg  zero_out:
1569f31e583aSLars Ellenberg 	if (nr_sectors) {
1570f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1571f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1572f31e583aSLars Ellenberg 	}
1573f31e583aSLars Ellenberg 	return err != 0;
1574f31e583aSLars Ellenberg }
1575f31e583aSLars Ellenberg 
1576f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1577f31e583aSLars Ellenberg {
1578f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1579f31e583aSLars Ellenberg 	struct disk_conf *dc;
1580f31e583aSLars Ellenberg 	bool can_do;
1581f31e583aSLars Ellenberg 
1582f31e583aSLars Ellenberg 	if (!blk_queue_discard(q))
1583f31e583aSLars Ellenberg 		return false;
1584f31e583aSLars Ellenberg 
1585f31e583aSLars Ellenberg 	rcu_read_lock();
1586f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1587f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1588f31e583aSLars Ellenberg 	rcu_read_unlock();
1589f31e583aSLars Ellenberg 	return can_do;
1590f31e583aSLars Ellenberg }
1591f31e583aSLars Ellenberg 
1592f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1593f31e583aSLars Ellenberg {
1594f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1595f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1596f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1597f31e583aSLars Ellenberg 	 * otherwise. */
1598f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1599f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1600f31e583aSLars Ellenberg 
1601f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1602f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1603dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1604dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1605dd4f699dSLars Ellenberg }
1606dd4f699dSLars Ellenberg 
16079104d31aSLars Ellenberg static void drbd_issue_peer_wsame(struct drbd_device *device,
16089104d31aSLars Ellenberg 				  struct drbd_peer_request *peer_req)
16099104d31aSLars Ellenberg {
16109104d31aSLars Ellenberg 	struct block_device *bdev = device->ldev->backing_bdev;
16119104d31aSLars Ellenberg 	sector_t s = peer_req->i.sector;
16129104d31aSLars Ellenberg 	sector_t nr = peer_req->i.size >> 9;
16139104d31aSLars Ellenberg 	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
16149104d31aSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
16159104d31aSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
16169104d31aSLars Ellenberg }
16179104d31aSLars Ellenberg 
16189104d31aSLars Ellenberg 
16196ec2a0f2SLee Jones /*
1620fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1621b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1622db830c46SAndreas Gruenbacher  * @peer_req:	peer request
162310f6d992SLars Ellenberg  *
162410f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162510f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162610f6d992SLars Ellenberg  *
162710f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
162810f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
162910f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
163010f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
163110f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
163210f6d992SLars Ellenberg  *  on certain Xen deployments.
163345bb912bSLars Ellenberg  */
163445bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1635b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1636fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
1637bb3cc85eSMike Christie 			     const unsigned op, const unsigned op_flags,
1638bb3cc85eSMike Christie 			     const int fault_type)
163945bb912bSLars Ellenberg {
164045bb912bSLars Ellenberg 	struct bio *bios = NULL;
164145bb912bSLars Ellenberg 	struct bio *bio;
1642db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1643db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
164411f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
164545bb912bSLars Ellenberg 	unsigned n_bios = 0;
164611f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
164745bb912bSLars Ellenberg 
1648dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1649dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1650dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1651dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1652dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1653dd4f699dSLars Ellenberg 	 */
1654f31e583aSLars Ellenberg 	if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
1655a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1656a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16575dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
165845d2933cSLars Ellenberg 		/* add it to the active list now,
165945d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
166021ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
166121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1662700ca8c0SPhilipp Reisner 
1663700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1664700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1665700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166645d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
166745d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
166845d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1669700ca8c0SPhilipp Reisner 		}
1670700ca8c0SPhilipp Reisner 
1671f31e583aSLars Ellenberg 		if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
1672f31e583aSLars Ellenberg 			drbd_issue_peer_discard_or_zero_out(device, peer_req);
16739104d31aSLars Ellenberg 		else /* EE_WRITE_SAME */
16749104d31aSLars Ellenberg 			drbd_issue_peer_wsame(device, peer_req);
1675a0fb3c47SLars Ellenberg 		return 0;
1676a0fb3c47SLars Ellenberg 	}
1677a0fb3c47SLars Ellenberg 
167845bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
167945bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
168045bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16819476f39dSLars Ellenberg 	 * request in more than one bio.
16829476f39dSLars Ellenberg 	 *
16839476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16849476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16859476f39dSLars Ellenberg 	 */
168645bb912bSLars Ellenberg next_bio:
1687*07888c66SChristoph Hellwig 	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags,
1688*07888c66SChristoph Hellwig 			GFP_NOIO);
1689db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16904f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1691db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1692fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
169345bb912bSLars Ellenberg 
169445bb912bSLars Ellenberg 	bio->bi_next = bios;
169545bb912bSLars Ellenberg 	bios = bio;
169645bb912bSLars Ellenberg 	++n_bios;
169745bb912bSLars Ellenberg 
169845bb912bSLars Ellenberg 	page_chain_for_each(page) {
169911f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170006efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170145bb912bSLars Ellenberg 			goto next_bio;
170211f8b2b6SAndreas Gruenbacher 		data_size -= len;
170345bb912bSLars Ellenberg 		sector += len >> 9;
170445bb912bSLars Ellenberg 		--nr_pages;
170545bb912bSLars Ellenberg 	}
170611f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1707a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
170845bb912bSLars Ellenberg 
1709db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171021ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171121ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
171221ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
171345bb912bSLars Ellenberg 	do {
171445bb912bSLars Ellenberg 		bio = bios;
171545bb912bSLars Ellenberg 		bios = bios->bi_next;
171645bb912bSLars Ellenberg 		bio->bi_next = NULL;
171745bb912bSLars Ellenberg 
1718ed00aabdSChristoph Hellwig 		drbd_submit_bio_noacct(device, fault_type, bio);
171945bb912bSLars Ellenberg 	} while (bios);
172045bb912bSLars Ellenberg 	return 0;
172145bb912bSLars Ellenberg }
172245bb912bSLars Ellenberg 
1723b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1724db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
172553840641SAndreas Gruenbacher {
1726db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
172753840641SAndreas Gruenbacher 
1728b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
172953840641SAndreas Gruenbacher 	drbd_clear_interval(i);
173053840641SAndreas Gruenbacher 
17316c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
173253840641SAndreas Gruenbacher 	if (i->waiting)
1733b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
173453840641SAndreas Gruenbacher }
173553840641SAndreas Gruenbacher 
1736bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
173777fede51SPhilipp Reisner {
1738c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
173977fede51SPhilipp Reisner 	int vnr;
174077fede51SPhilipp Reisner 
174177fede51SPhilipp Reisner 	rcu_read_lock();
1742c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1743c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1744c06ece6bSAndreas Gruenbacher 
1745b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
174677fede51SPhilipp Reisner 		rcu_read_unlock();
1747b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
174805a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
174977fede51SPhilipp Reisner 		rcu_read_lock();
175077fede51SPhilipp Reisner 	}
175177fede51SPhilipp Reisner 	rcu_read_unlock();
175277fede51SPhilipp Reisner }
175377fede51SPhilipp Reisner 
1754bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1755b411b363SPhilipp Reisner {
17562451fc3bSPhilipp Reisner 	int rv;
1757e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1758b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1759b411b363SPhilipp Reisner 
17609ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17619ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17629ed57dcbSLars Ellenberg 	 */
1763bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1764bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1765bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1766b411b363SPhilipp Reisner 
1767b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1768b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1769b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1770b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1771b411b363SPhilipp Reisner 	 * completed. */
1772e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1773f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1774b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
177582bc0194SAndreas Gruenbacher 			return 0;
1776b411b363SPhilipp Reisner 
1777b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1778b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1779b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17802451fc3bSPhilipp Reisner 		if (epoch)
17812451fc3bSPhilipp Reisner 			break;
17822451fc3bSPhilipp Reisner 		else
17831ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1784df561f66SGustavo A. R. Silva 		fallthrough;
17852451fc3bSPhilipp Reisner 
1786f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1787f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1788bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1789bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
17902451fc3bSPhilipp Reisner 
1791bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
17922451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17932451fc3bSPhilipp Reisner 			if (epoch)
17942451fc3bSPhilipp Reisner 				break;
1795b411b363SPhilipp Reisner 		}
1796b411b363SPhilipp Reisner 
179782bc0194SAndreas Gruenbacher 		return 0;
17982451fc3bSPhilipp Reisner 	default:
1799e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1800e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
180182bc0194SAndreas Gruenbacher 		return -EIO;
1802b411b363SPhilipp Reisner 	}
1803b411b363SPhilipp Reisner 
1804b411b363SPhilipp Reisner 	epoch->flags = 0;
1805b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1806b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1807b411b363SPhilipp Reisner 
1808bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1809bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1810bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1811bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1812bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1813b411b363SPhilipp Reisner 	} else {
1814b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1815b411b363SPhilipp Reisner 		kfree(epoch);
1816b411b363SPhilipp Reisner 	}
1817bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1818b411b363SPhilipp Reisner 
181982bc0194SAndreas Gruenbacher 	return 0;
1820b411b363SPhilipp Reisner }
1821b411b363SPhilipp Reisner 
18229104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18233d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18249104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18259104d31aSLars Ellenberg 			      unsigned int payload_size)
18269104d31aSLars Ellenberg {
18279104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18289104d31aSLars Ellenberg 	r->i.size = payload_size;
18299104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18309104d31aSLars Ellenberg 	r->i.size = tmp;
18319104d31aSLars Ellenberg }
18329104d31aSLars Ellenberg 
1833b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18349104d31aSLars Ellenberg  * and from receive_Data.
18359104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18369104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18379104d31aSLars Ellenberg  * 	for discards, that is zero.
18389104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18399104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18409104d31aSLars Ellenberg  * as extra argument in the packet header.
18419104d31aSLars Ellenberg  */
1842f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
184369a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1844a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1845b411b363SPhilipp Reisner {
184669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1847155bd9d1SChristoph Hellwig 	const sector_t capacity = get_capacity(device->vdisk);
1848db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1849b411b363SPhilipp Reisner 	struct page *page;
185011f8b2b6SAndreas Gruenbacher 	int digest_size, err;
185111f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
185269a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
185369a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18546b4388acSPhilipp Reisner 	unsigned long *data;
1855a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1856f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
18579104d31aSLars Ellenberg 	struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1858b411b363SPhilipp Reisner 
185911f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1860a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18613d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18629f5bdc33SAndreas Gruenbacher 		/*
18639f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18649f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18659f5bdc33SAndreas Gruenbacher 		 */
186611f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1867a5c31904SAndreas Gruenbacher 		if (err)
1868b411b363SPhilipp Reisner 			return NULL;
186911f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
187088104ca4SAndreas Gruenbacher 	}
1871b411b363SPhilipp Reisner 
18729104d31aSLars Ellenberg 	/* assume request_size == data_size, but special case trim and wsame. */
18739104d31aSLars Ellenberg 	ds = data_size;
1874a0fb3c47SLars Ellenberg 	if (trim) {
18759104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18769104d31aSLars Ellenberg 			return NULL;
18779104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1878f31e583aSLars Ellenberg 	} else if (zeroes) {
1879f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1880f31e583aSLars Ellenberg 			return NULL;
1881f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
18829104d31aSLars Ellenberg 	} else if (wsame) {
18839104d31aSLars Ellenberg 		if (data_size != queue_logical_block_size(device->rq_queue)) {
18849104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
18859104d31aSLars Ellenberg 				data_size, queue_logical_block_size(device->rq_queue));
18869104d31aSLars Ellenberg 			return NULL;
18879104d31aSLars Ellenberg 		}
18889104d31aSLars Ellenberg 		if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
18899104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
18909104d31aSLars Ellenberg 				data_size, bdev_logical_block_size(device->ldev->backing_bdev));
18919104d31aSLars Ellenberg 			return NULL;
18929104d31aSLars Ellenberg 		}
18939104d31aSLars Ellenberg 		ds = be32_to_cpu(wsame->size);
1894a0fb3c47SLars Ellenberg 	}
1895a0fb3c47SLars Ellenberg 
18969104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1897841ce241SAndreas Gruenbacher 		return NULL;
1898f31e583aSLars Ellenberg 	if (trim || wsame || zeroes) {
18999104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
19009104d31aSLars Ellenberg 			return NULL;
19019104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1902841ce241SAndreas Gruenbacher 		return NULL;
1903b411b363SPhilipp Reisner 
19046666032aSLars Ellenberg 	/* even though we trust out peer,
19056666032aSLars Ellenberg 	 * we sometimes have to double check. */
19069104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1907d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1908fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
19096666032aSLars Ellenberg 			(unsigned long long)capacity,
19109104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19116666032aSLars Ellenberg 		return NULL;
19126666032aSLars Ellenberg 	}
19136666032aSLars Ellenberg 
1914b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1915b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1916b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19179104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1918db830c46SAndreas Gruenbacher 	if (!peer_req)
1919b411b363SPhilipp Reisner 		return NULL;
192045bb912bSLars Ellenberg 
192121ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19229104d31aSLars Ellenberg 	if (trim) {
1923f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1924f31e583aSLars Ellenberg 		return peer_req;
1925f31e583aSLars Ellenberg 	}
1926f31e583aSLars Ellenberg 	if (zeroes) {
1927f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
192881a3537aSLars Ellenberg 		return peer_req;
19299104d31aSLars Ellenberg 	}
19309104d31aSLars Ellenberg 	if (wsame)
19319104d31aSLars Ellenberg 		peer_req->flags |= EE_WRITE_SAME;
1932a73ff323SLars Ellenberg 
19339104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1934b411b363SPhilipp Reisner 	ds = data_size;
1935db830c46SAndreas Gruenbacher 	page = peer_req->pages;
193645bb912bSLars Ellenberg 	page_chain_for_each(page) {
193745bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19386b4388acSPhilipp Reisner 		data = kmap(page);
193969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1940b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1941d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19426b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19436b4388acSPhilipp Reisner 		}
1944b411b363SPhilipp Reisner 		kunmap(page);
1945a5c31904SAndreas Gruenbacher 		if (err) {
1946b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1947b411b363SPhilipp Reisner 			return NULL;
1948b411b363SPhilipp Reisner 		}
1949a5c31904SAndreas Gruenbacher 		ds -= len;
1950b411b363SPhilipp Reisner 	}
1951b411b363SPhilipp Reisner 
195211f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19539104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
195411f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1955d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1956470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1957b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1958b411b363SPhilipp Reisner 			return NULL;
1959b411b363SPhilipp Reisner 		}
1960b411b363SPhilipp Reisner 	}
1961b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1962db830c46SAndreas Gruenbacher 	return peer_req;
1963b411b363SPhilipp Reisner }
1964b411b363SPhilipp Reisner 
1965b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1966b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1967b411b363SPhilipp Reisner  */
196869a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1969b411b363SPhilipp Reisner {
1970b411b363SPhilipp Reisner 	struct page *page;
1971a5c31904SAndreas Gruenbacher 	int err = 0;
1972b411b363SPhilipp Reisner 	void *data;
1973b411b363SPhilipp Reisner 
1974c3470cdeSLars Ellenberg 	if (!data_size)
1975fc5be839SAndreas Gruenbacher 		return 0;
1976c3470cdeSLars Ellenberg 
197769a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1978b411b363SPhilipp Reisner 
1979b411b363SPhilipp Reisner 	data = kmap(page);
1980b411b363SPhilipp Reisner 	while (data_size) {
1981fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1982fc5be839SAndreas Gruenbacher 
198369a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1984a5c31904SAndreas Gruenbacher 		if (err)
1985b411b363SPhilipp Reisner 			break;
1986a5c31904SAndreas Gruenbacher 		data_size -= len;
1987b411b363SPhilipp Reisner 	}
1988b411b363SPhilipp Reisner 	kunmap(page);
198969a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1990fc5be839SAndreas Gruenbacher 	return err;
1991b411b363SPhilipp Reisner }
1992b411b363SPhilipp Reisner 
199369a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1994b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1995b411b363SPhilipp Reisner {
19967988613bSKent Overstreet 	struct bio_vec bvec;
19977988613bSKent Overstreet 	struct bvec_iter iter;
1998b411b363SPhilipp Reisner 	struct bio *bio;
199911f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
200069a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
200169a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
2002b411b363SPhilipp Reisner 
200311f8b2b6SAndreas Gruenbacher 	digest_size = 0;
200469a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
20053d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
200611f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
2007a5c31904SAndreas Gruenbacher 		if (err)
2008a5c31904SAndreas Gruenbacher 			return err;
200911f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
201088104ca4SAndreas Gruenbacher 	}
2011b411b363SPhilipp Reisner 
2012b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2013b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
201469a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2015b411b363SPhilipp Reisner 
2016b411b363SPhilipp Reisner 	bio = req->master_bio;
201769a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2018b411b363SPhilipp Reisner 
20197988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20207988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
20217988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
202269a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20237988613bSKent Overstreet 		kunmap(bvec.bv_page);
2024a5c31904SAndreas Gruenbacher 		if (err)
2025a5c31904SAndreas Gruenbacher 			return err;
2026a5c31904SAndreas Gruenbacher 		data_size -= expect;
2027b411b363SPhilipp Reisner 	}
2028b411b363SPhilipp Reisner 
202911f8b2b6SAndreas Gruenbacher 	if (digest_size) {
203069a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
203111f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
203269a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
203328284cefSAndreas Gruenbacher 			return -EINVAL;
2034b411b363SPhilipp Reisner 		}
2035b411b363SPhilipp Reisner 	}
2036b411b363SPhilipp Reisner 
203769a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
203828284cefSAndreas Gruenbacher 	return 0;
2039b411b363SPhilipp Reisner }
2040b411b363SPhilipp Reisner 
2041a990be46SAndreas Gruenbacher /*
2042668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2043a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2044a990be46SAndreas Gruenbacher  */
204599920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2046b411b363SPhilipp Reisner {
20478050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2048a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2049a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2050a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2051db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
205299920dc5SAndreas Gruenbacher 	int err;
2053b411b363SPhilipp Reisner 
20540b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2055b411b363SPhilipp Reisner 
2056db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2057b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2058a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2059b411b363SPhilipp Reisner 	} else {
2060b411b363SPhilipp Reisner 		/* Record failure to sync */
2061b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2062b411b363SPhilipp Reisner 
2063a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2064b411b363SPhilipp Reisner 	}
2065b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2066b411b363SPhilipp Reisner 
206799920dc5SAndreas Gruenbacher 	return err;
2068b411b363SPhilipp Reisner }
2069b411b363SPhilipp Reisner 
207069a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2071a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2072b411b363SPhilipp Reisner {
207369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2074db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2075b411b363SPhilipp Reisner 
2076a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2077db830c46SAndreas Gruenbacher 	if (!peer_req)
207845bb912bSLars Ellenberg 		goto fail;
2079b411b363SPhilipp Reisner 
2080b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2081b411b363SPhilipp Reisner 
2082b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2083b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2084b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2085b411b363SPhilipp Reisner 
2086a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
208721ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
208845bb912bSLars Ellenberg 
20890500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2090b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
20910500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2092b411b363SPhilipp Reisner 
2093a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2094bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2095bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
2096e1c1b0fcSAndreas Gruenbacher 		return 0;
209745bb912bSLars Ellenberg 
209810f6d992SLars Ellenberg 	/* don't care for the reason here */
2099d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
21000500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2101a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
21020500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
210322cc37a9SLars Ellenberg 
2104b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
210545bb912bSLars Ellenberg fail:
2106b30ab791SAndreas Gruenbacher 	put_ldev(device);
2107e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2108b411b363SPhilipp Reisner }
2109b411b363SPhilipp Reisner 
2110668eebc6SAndreas Gruenbacher static struct drbd_request *
2111b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2112bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2113b411b363SPhilipp Reisner {
2114b411b363SPhilipp Reisner 	struct drbd_request *req;
2115668eebc6SAndreas Gruenbacher 
2116bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2117bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21185e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2119668eebc6SAndreas Gruenbacher 		return req;
2120c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2121d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2122c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2123c3afd8f5SAndreas Gruenbacher 	}
2124668eebc6SAndreas Gruenbacher 	return NULL;
2125668eebc6SAndreas Gruenbacher }
2126668eebc6SAndreas Gruenbacher 
2127bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2128b411b363SPhilipp Reisner {
21299f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2130b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2131b411b363SPhilipp Reisner 	struct drbd_request *req;
2132b411b363SPhilipp Reisner 	sector_t sector;
213382bc0194SAndreas Gruenbacher 	int err;
2134e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21354a76b161SAndreas Gruenbacher 
21369f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21379f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21384a76b161SAndreas Gruenbacher 		return -EIO;
21399f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2140b411b363SPhilipp Reisner 
2141b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2142b411b363SPhilipp Reisner 
21430500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2144b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21450500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2146c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
214782bc0194SAndreas Gruenbacher 		return -EIO;
2148b411b363SPhilipp Reisner 
214924c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
2150b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
2151b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
215269a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
215382bc0194SAndreas Gruenbacher 	if (!err)
21548554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2155b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2156b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2157b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2158b411b363SPhilipp Reisner 
215982bc0194SAndreas Gruenbacher 	return err;
2160b411b363SPhilipp Reisner }
2161b411b363SPhilipp Reisner 
2162bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2163b411b363SPhilipp Reisner {
21649f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2165b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2166b411b363SPhilipp Reisner 	sector_t sector;
216782bc0194SAndreas Gruenbacher 	int err;
2168e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21694a76b161SAndreas Gruenbacher 
21709f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21719f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21724a76b161SAndreas Gruenbacher 		return -EIO;
21739f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2174b411b363SPhilipp Reisner 
2175b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21760b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2177b411b363SPhilipp Reisner 
2178b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2179b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2180b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2181fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2182a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2183b411b363SPhilipp Reisner 	} else {
2184b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2185d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2186b411b363SPhilipp Reisner 
218769a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2188b411b363SPhilipp Reisner 
218969a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2190b411b363SPhilipp Reisner 	}
2191b411b363SPhilipp Reisner 
2192b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2193778f271dSPhilipp Reisner 
219482bc0194SAndreas Gruenbacher 	return err;
2195b411b363SPhilipp Reisner }
2196b411b363SPhilipp Reisner 
2197b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
21987be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2199b411b363SPhilipp Reisner {
22007be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
22017be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2202b411b363SPhilipp Reisner 
2203b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22047be8da07SAndreas Gruenbacher 		if (!i->local)
22057be8da07SAndreas Gruenbacher 			continue;
22067be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
22077be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
22087be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
22097be8da07SAndreas Gruenbacher 			continue;
22102312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
22112312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2212d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
22137be8da07SAndreas Gruenbacher 	}
22147be8da07SAndreas Gruenbacher }
22157be8da07SAndreas Gruenbacher 
2216a990be46SAndreas Gruenbacher /*
2217668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2218b411b363SPhilipp Reisner  */
221999920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2220b411b363SPhilipp Reisner {
22218050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2222a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2223a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2224a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2225db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
222699920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2227b411b363SPhilipp Reisner 
2228303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2229db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2230b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2231b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2232db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2233b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2234a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2235b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2236b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2237b411b363SPhilipp Reisner 		} else {
2238a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2239b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2240b411b363SPhilipp Reisner 			 * maybe assert this?  */
2241b411b363SPhilipp Reisner 		}
2242b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2243b411b363SPhilipp Reisner 	}
224408d0dabfSLars Ellenberg 
2245b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2246b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2247302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22480500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22490b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2250b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22517be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2252b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22530500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2254bb3bfe96SAndreas Gruenbacher 	} else
22550b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2256b411b363SPhilipp Reisner 
22575dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2258b411b363SPhilipp Reisner 
225999920dc5SAndreas Gruenbacher 	return err;
2260b411b363SPhilipp Reisner }
2261b411b363SPhilipp Reisner 
2262a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2263b411b363SPhilipp Reisner {
22648050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2265a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2266a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
226799920dc5SAndreas Gruenbacher 	int err;
2268b411b363SPhilipp Reisner 
2269a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2270a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2271b411b363SPhilipp Reisner 
227299920dc5SAndreas Gruenbacher 	return err;
2273b411b363SPhilipp Reisner }
2274b411b363SPhilipp Reisner 
2275d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2276b6a370baSPhilipp Reisner {
2277a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22787be8da07SAndreas Gruenbacher }
2279b6a370baSPhilipp Reisner 
228099920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22817be8da07SAndreas Gruenbacher {
2282a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2283a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2284a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
22857be8da07SAndreas Gruenbacher 
2286a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2287d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
22887be8da07SAndreas Gruenbacher }
22897be8da07SAndreas Gruenbacher 
22903e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
22913e394da1SAndreas Gruenbacher {
22923e394da1SAndreas Gruenbacher 	/*
22933e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
22943e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
22953e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
22963e394da1SAndreas Gruenbacher 	 */
22973e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
22983e394da1SAndreas Gruenbacher }
22993e394da1SAndreas Gruenbacher 
23003e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
23013e394da1SAndreas Gruenbacher {
23023e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
23033e394da1SAndreas Gruenbacher }
23043e394da1SAndreas Gruenbacher 
230569a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
23063e394da1SAndreas Gruenbacher {
230769a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
23083c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
23093e394da1SAndreas Gruenbacher 
231069a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2311b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2312b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2313b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2314b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2315b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23163c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2317b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23183e394da1SAndreas Gruenbacher 	}
23197be8da07SAndreas Gruenbacher }
23203e394da1SAndreas Gruenbacher 
2321d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2322d93f6302SLars Ellenberg {
2323d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2324d93f6302SLars Ellenberg }
2325d93f6302SLars Ellenberg 
2326d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2327b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2328d93f6302SLars Ellenberg {
2329d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23307e5fec31SFabian Frederick 	bool rv = false;
2331b6a370baSPhilipp Reisner 
23320500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2333a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2334d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2335d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23367e5fec31SFabian Frederick 			rv = true;
2337b6a370baSPhilipp Reisner 			break;
2338b6a370baSPhilipp Reisner 		}
2339b6a370baSPhilipp Reisner 	}
23400500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2341b6a370baSPhilipp Reisner 
2342b6a370baSPhilipp Reisner 	return rv;
2343b6a370baSPhilipp Reisner }
2344b6a370baSPhilipp Reisner 
2345b411b363SPhilipp Reisner /* Called from receive_Data.
2346b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2347b411b363SPhilipp Reisner  *
2348b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2349b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2350b411b363SPhilipp Reisner  * been sent.
2351b411b363SPhilipp Reisner  *
2352b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2353b411b363SPhilipp Reisner  *
2354b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2355b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2356b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2357b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2358b411b363SPhilipp Reisner  *
2359b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2360b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2361b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2362b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2363b411b363SPhilipp Reisner  *
2364b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2365b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
236669a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2367b411b363SPhilipp Reisner {
236869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2369b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2370b411b363SPhilipp Reisner 	long timeout;
2371b874d231SPhilipp Reisner 	int ret = 0, tp;
23727be8da07SAndreas Gruenbacher 
237369a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23747be8da07SAndreas Gruenbacher 		return 0;
23757be8da07SAndreas Gruenbacher 
2376b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2377b411b363SPhilipp Reisner 	for (;;) {
2378b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2379b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2380b411b363SPhilipp Reisner 			break;
23817be8da07SAndreas Gruenbacher 		}
2382b874d231SPhilipp Reisner 
2383b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2384b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2385b411b363SPhilipp Reisner 			break;
2386b411b363SPhilipp Reisner 		}
2387b874d231SPhilipp Reisner 
2388b874d231SPhilipp Reisner 		rcu_read_lock();
23895dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2390b874d231SPhilipp Reisner 		rcu_read_unlock();
2391b874d231SPhilipp Reisner 
2392b874d231SPhilipp Reisner 		if (!tp)
2393b874d231SPhilipp Reisner 			break;
2394b874d231SPhilipp Reisner 
2395b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2396b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2397b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
239844ed167dSPhilipp Reisner 		rcu_read_lock();
239969a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
240044ed167dSPhilipp Reisner 		rcu_read_unlock();
240171b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2402b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
24037be8da07SAndreas Gruenbacher 		if (!timeout) {
2404b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2405d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2406b411b363SPhilipp Reisner 			break;
2407b411b363SPhilipp Reisner 		}
2408b411b363SPhilipp Reisner 	}
2409b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2410b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2411b411b363SPhilipp Reisner 	return ret;
2412b411b363SPhilipp Reisner }
2413b411b363SPhilipp Reisner 
2414688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2415688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2416688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2417bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf)
241876d2e7ecSPhilipp Reisner {
241976d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
242076d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
242128a8f0d3SMike Christie 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2422bb3cc85eSMike Christie }
2423bb3cc85eSMike Christie 
2424bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf)
2425bb3cc85eSMike Christie {
2426f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
242745c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2428f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2429f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2430f31e583aSLars Ellenberg 	if (dpf & DP_WSAME)
2431f31e583aSLars Ellenberg 		return REQ_OP_WRITE_SAME;
2432bb3cc85eSMike Christie 	else
2433bb3cc85eSMike Christie 		return REQ_OP_WRITE;
243476d2e7ecSPhilipp Reisner }
243576d2e7ecSPhilipp Reisner 
2436b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24377be8da07SAndreas Gruenbacher 				    unsigned int size)
2438b411b363SPhilipp Reisner {
24397be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24407be8da07SAndreas Gruenbacher 
24417be8da07SAndreas Gruenbacher     repeat:
2442b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24437be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24447be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24457be8da07SAndreas Gruenbacher 
24467be8da07SAndreas Gruenbacher 		if (!i->local)
24477be8da07SAndreas Gruenbacher 			continue;
24487be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24497be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24507be8da07SAndreas Gruenbacher 			continue;
24517be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24527be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24530500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24547be8da07SAndreas Gruenbacher 		if (m.bio)
2455b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24560500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24577be8da07SAndreas Gruenbacher 		goto repeat;
24587be8da07SAndreas Gruenbacher 	}
24597be8da07SAndreas Gruenbacher }
24607be8da07SAndreas Gruenbacher 
2461b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24627be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24637be8da07SAndreas Gruenbacher {
2464e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2465bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24667be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24677be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24687be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24697be8da07SAndreas Gruenbacher 	bool equal;
24707be8da07SAndreas Gruenbacher 	int err;
24717be8da07SAndreas Gruenbacher 
24727be8da07SAndreas Gruenbacher 	/*
24737be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24747be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24757be8da07SAndreas Gruenbacher 	 */
2476b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24777be8da07SAndreas Gruenbacher 
24787be8da07SAndreas Gruenbacher     repeat:
2479b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24807be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24817be8da07SAndreas Gruenbacher 			continue;
248208d0dabfSLars Ellenberg 		if (i->completed)
248308d0dabfSLars Ellenberg 			continue;
24847be8da07SAndreas Gruenbacher 
24857be8da07SAndreas Gruenbacher 		if (!i->local) {
24867be8da07SAndreas Gruenbacher 			/*
24877be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
24887be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
24897be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
24907be8da07SAndreas Gruenbacher 			 */
2491b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
24927be8da07SAndreas Gruenbacher 			if (err)
24937be8da07SAndreas Gruenbacher 				goto out;
24947be8da07SAndreas Gruenbacher 			goto repeat;
24957be8da07SAndreas Gruenbacher 		}
24967be8da07SAndreas Gruenbacher 
24977be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
24987be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
24997be8da07SAndreas Gruenbacher 			/*
25007be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2501d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2502d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2503d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
25047be8da07SAndreas Gruenbacher 			 */
2505d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
25067be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
25077be8da07SAndreas Gruenbacher 
25087be8da07SAndreas Gruenbacher 			if (!equal)
2509d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25107be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
25117be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
25127be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25137be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2514d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
25157be8da07SAndreas Gruenbacher 
2516a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
25177be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2518a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2519668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25207be8da07SAndreas Gruenbacher 
25217be8da07SAndreas Gruenbacher 			err = -ENOENT;
25227be8da07SAndreas Gruenbacher 			goto out;
25237be8da07SAndreas Gruenbacher 		} else {
25247be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25257be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25267be8da07SAndreas Gruenbacher 
25277be8da07SAndreas Gruenbacher 			if (!equal)
2528d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25297be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25307be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25317be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25327be8da07SAndreas Gruenbacher 
25337be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25347be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25357be8da07SAndreas Gruenbacher 				/*
25367be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2537d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2538d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2539d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25407be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25417be8da07SAndreas Gruenbacher 				 *
25427be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25437be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25447be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25457be8da07SAndreas Gruenbacher 				 */
2546b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25477be8da07SAndreas Gruenbacher 				if (err) {
2548e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2549b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25507be8da07SAndreas Gruenbacher 					goto out;
25517be8da07SAndreas Gruenbacher 				}
25527be8da07SAndreas Gruenbacher 				goto repeat;
25537be8da07SAndreas Gruenbacher 			}
25547be8da07SAndreas Gruenbacher 			/*
25557be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25567be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25577be8da07SAndreas Gruenbacher 			 */
25587be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25597be8da07SAndreas Gruenbacher 		}
25607be8da07SAndreas Gruenbacher 	}
25617be8da07SAndreas Gruenbacher 	err = 0;
25627be8da07SAndreas Gruenbacher 
25637be8da07SAndreas Gruenbacher     out:
25647be8da07SAndreas Gruenbacher 	if (err)
2565b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25667be8da07SAndreas Gruenbacher 	return err;
25677be8da07SAndreas Gruenbacher }
25687be8da07SAndreas Gruenbacher 
2569b411b363SPhilipp Reisner /* mirrored write */
2570bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2571b411b363SPhilipp Reisner {
25729f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2573b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
257421ae5d7fSLars Ellenberg 	struct net_conf *nc;
2575b411b363SPhilipp Reisner 	sector_t sector;
2576db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2577e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25787be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2579bb3cc85eSMike Christie 	int op, op_flags;
2580b411b363SPhilipp Reisner 	u32 dp_flags;
2581302bdeaeSPhilipp Reisner 	int err, tp;
25827be8da07SAndreas Gruenbacher 
25839f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25849f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25854a76b161SAndreas Gruenbacher 		return -EIO;
25869f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2587b411b363SPhilipp Reisner 
2588b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
258982bc0194SAndreas Gruenbacher 		int err2;
2590b411b363SPhilipp Reisner 
259169a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
259269a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2593bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
259469a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
259582bc0194SAndreas Gruenbacher 		if (!err)
259682bc0194SAndreas Gruenbacher 			err = err2;
259782bc0194SAndreas Gruenbacher 		return err;
2598b411b363SPhilipp Reisner 	}
2599b411b363SPhilipp Reisner 
2600fcefa62eSAndreas Gruenbacher 	/*
2601fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2602fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2603fcefa62eSAndreas Gruenbacher 	 * end of this function.
2604fcefa62eSAndreas Gruenbacher 	 */
2605b411b363SPhilipp Reisner 
2606b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2607a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2608db830c46SAndreas Gruenbacher 	if (!peer_req) {
2609b30ab791SAndreas Gruenbacher 		put_ldev(device);
261082bc0194SAndreas Gruenbacher 		return -EIO;
2611b411b363SPhilipp Reisner 	}
2612b411b363SPhilipp Reisner 
2613a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
261421ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
261521ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2616b411b363SPhilipp Reisner 
2617688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2618bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2619bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2620a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2621a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2622f31e583aSLars Ellenberg 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2623f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2624f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2625f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2626f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2627f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2628f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2629f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
263045c21793SChristoph Hellwig 		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2631a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2632f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2633f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2634f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2635a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26360b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26370b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2638a73ff323SLars Ellenberg 	}
2639688593c5SLars Ellenberg 
2640688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2641db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2642688593c5SLars Ellenberg 
2643bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2644bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2645db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2646db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2647bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2648b411b363SPhilipp Reisner 
2649302bdeaeSPhilipp Reisner 	rcu_read_lock();
265021ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
265121ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
265221ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
265321ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
265421ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
265521ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
265621ae5d7fSLars Ellenberg 			break;
265721ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
265821ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
265921ae5d7fSLars Ellenberg 			break;
266021ae5d7fSLars Ellenberg 		}
266121ae5d7fSLars Ellenberg 	}
2662302bdeaeSPhilipp Reisner 	rcu_read_unlock();
266321ae5d7fSLars Ellenberg 
266421ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
266521ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
266621ae5d7fSLars Ellenberg 		inc_unacked(device);
266721ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
266821ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
266921ae5d7fSLars Ellenberg 	}
267021ae5d7fSLars Ellenberg 
267121ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
267221ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
267321ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26745dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
267521ae5d7fSLars Ellenberg 	}
267621ae5d7fSLars Ellenberg 
2677302bdeaeSPhilipp Reisner 	if (tp) {
267821ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
267921ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2680302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
268169a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26827be8da07SAndreas Gruenbacher 		if (err)
2683b411b363SPhilipp Reisner 			goto out_interrupted;
26840500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2685b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
26867be8da07SAndreas Gruenbacher 		if (err) {
26870500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
26887be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2689b30ab791SAndreas Gruenbacher 				put_ldev(device);
269082bc0194SAndreas Gruenbacher 				return 0;
2691b411b363SPhilipp Reisner 			}
2692b411b363SPhilipp Reisner 			goto out_interrupted;
2693b411b363SPhilipp Reisner 		}
2694b874d231SPhilipp Reisner 	} else {
269569a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
26960500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2697b874d231SPhilipp Reisner 	}
26989104d31aSLars Ellenberg 	/* TRIM and WRITE_SAME are processed synchronously,
26999104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2700a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2701a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2702f31e583aSLars Ellenberg 	if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
2703b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
27040500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2705b411b363SPhilipp Reisner 
2706b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2707b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2708b6a370baSPhilipp Reisner 
2709b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2710b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2711b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2712db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
27134dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
271421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2715b411b363SPhilipp Reisner 	}
2716b411b363SPhilipp Reisner 
2717bb3cc85eSMike Christie 	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2718bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
271982bc0194SAndreas Gruenbacher 	if (!err)
272082bc0194SAndreas Gruenbacher 		return 0;
2721b411b363SPhilipp Reisner 
272210f6d992SLars Ellenberg 	/* don't care for the reason here */
2723d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27240500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2725a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2726b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27270500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
272821ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
272921ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2730b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
273121ae5d7fSLars Ellenberg 	}
273222cc37a9SLars Ellenberg 
2733b411b363SPhilipp Reisner out_interrupted:
27347e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2735b30ab791SAndreas Gruenbacher 	put_ldev(device);
2736b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
273782bc0194SAndreas Gruenbacher 	return err;
2738b411b363SPhilipp Reisner }
2739b411b363SPhilipp Reisner 
27400f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27410f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27420f0601f4SLars Ellenberg  *
27430f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27440f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27450f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27460f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27470f0601f4SLars Ellenberg  *
27480f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27490f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27500f0601f4SLars Ellenberg  */
2751ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2752ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2753e8299874SLars Ellenberg {
2754e8299874SLars Ellenberg 	struct lc_element *tmp;
2755ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2756e8299874SLars Ellenberg 
2757ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2758ad3fee79SLars Ellenberg 		return throttle;
2759e8299874SLars Ellenberg 
2760e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2761e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2762e8299874SLars Ellenberg 	if (tmp) {
2763e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2764e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2765e8299874SLars Ellenberg 			throttle = false;
2766ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2767ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2768e8299874SLars Ellenberg 	}
2769e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2770e8299874SLars Ellenberg 
2771e8299874SLars Ellenberg 	return throttle;
2772e8299874SLars Ellenberg }
2773e8299874SLars Ellenberg 
2774e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27750f0601f4SLars Ellenberg {
27768c40c7c4SChristoph Hellwig 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27770f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2778daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2779e8299874SLars Ellenberg 	int curr_events;
2780daeda1ccSPhilipp Reisner 
2781daeda1ccSPhilipp Reisner 	rcu_read_lock();
2782b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2783daeda1ccSPhilipp Reisner 	rcu_read_unlock();
27840f0601f4SLars Ellenberg 
27850f0601f4SLars Ellenberg 	/* feature disabled? */
2786daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2787e8299874SLars Ellenberg 		return false;
2788e3555d85SPhilipp Reisner 
27898446fe92SChristoph Hellwig 	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2790b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2791ad3fee79SLars Ellenberg 
2792ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2793ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
27940f0601f4SLars Ellenberg 		unsigned long rs_left;
27950f0601f4SLars Ellenberg 		int i;
27960f0601f4SLars Ellenberg 
2797b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
27980f0601f4SLars Ellenberg 
27990f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
28000f0601f4SLars Ellenberg 		 * approx. */
2801b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
28022649f080SLars Ellenberg 
2803b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2804b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
28052649f080SLars Ellenberg 		else
2806b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
28070f0601f4SLars Ellenberg 
2808b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
28090f0601f4SLars Ellenberg 		if (!dt)
28100f0601f4SLars Ellenberg 			dt++;
2811b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
28120f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
28130f0601f4SLars Ellenberg 
2814daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2815e8299874SLars Ellenberg 			return true;
28160f0601f4SLars Ellenberg 	}
2817e8299874SLars Ellenberg 	return false;
28180f0601f4SLars Ellenberg }
28190f0601f4SLars Ellenberg 
2820bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2821b411b363SPhilipp Reisner {
28229f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2823b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2824b411b363SPhilipp Reisner 	sector_t sector;
28254a76b161SAndreas Gruenbacher 	sector_t capacity;
2826db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2827b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2828b18b37beSPhilipp Reisner 	int size, verb;
2829b411b363SPhilipp Reisner 	unsigned int fault_type;
2830e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28314a76b161SAndreas Gruenbacher 
28329f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28339f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28344a76b161SAndreas Gruenbacher 		return -EIO;
28359f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2836155bd9d1SChristoph Hellwig 	capacity = get_capacity(device->vdisk);
2837b411b363SPhilipp Reisner 
2838b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2839b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2840b411b363SPhilipp Reisner 
2841c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2842d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2843b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
284482bc0194SAndreas Gruenbacher 		return -EINVAL;
2845b411b363SPhilipp Reisner 	}
2846b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2847d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2848b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
284982bc0194SAndreas Gruenbacher 		return -EINVAL;
2850b411b363SPhilipp Reisner 	}
2851b411b363SPhilipp Reisner 
2852b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2853b18b37beSPhilipp Reisner 		verb = 1;
2854e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2855b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
285669a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2857b18b37beSPhilipp Reisner 			break;
2858700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2859b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2860b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2861b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
286269a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2863b18b37beSPhilipp Reisner 			break;
2864b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2865b18b37beSPhilipp Reisner 			verb = 0;
2866b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
286769a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2868b18b37beSPhilipp Reisner 			break;
2869b18b37beSPhilipp Reisner 		default:
287049ba9b1bSAndreas Gruenbacher 			BUG();
2871b18b37beSPhilipp Reisner 		}
2872b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2873d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2874b411b363SPhilipp Reisner 			    "no local data.\n");
2875b18b37beSPhilipp Reisner 
2876a821cc4aSLars Ellenberg 		/* drain possibly payload */
287769a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2878b411b363SPhilipp Reisner 	}
2879b411b363SPhilipp Reisner 
2880b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2881b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2882b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2883a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28849104d31aSLars Ellenberg 			size, GFP_NOIO);
2885db830c46SAndreas Gruenbacher 	if (!peer_req) {
2886b30ab791SAndreas Gruenbacher 		put_ldev(device);
288782bc0194SAndreas Gruenbacher 		return -ENOMEM;
2888b411b363SPhilipp Reisner 	}
2889b411b363SPhilipp Reisner 
2890e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2891b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2892a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2893b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
289480a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
289521ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
289680a40e43SLars Ellenberg 		goto submit;
289780a40e43SLars Ellenberg 
2898700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2899700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2900700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2901700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2902700ca8c0SPhilipp Reisner 		   the block... */
2903700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2904df561f66SGustavo A. R. Silva 		fallthrough;
2905b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2906a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2907b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
29085f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2909b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2910b411b363SPhilipp Reisner 		break;
2911b411b363SPhilipp Reisner 
2912b411b363SPhilipp Reisner 	case P_OV_REPLY:
2913b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2914b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2915e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2916b411b363SPhilipp Reisner 		if (!di)
2917b411b363SPhilipp Reisner 			goto out_free_e;
2918b411b363SPhilipp Reisner 
2919e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2920b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2921b411b363SPhilipp Reisner 
2922db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2923db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2924c36c3cedSLars Ellenberg 
29259f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2926b411b363SPhilipp Reisner 			goto out_free_e;
2927b411b363SPhilipp Reisner 
2928e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29299f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2930a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29315f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2932b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2933aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2934aaaba345SLars Ellenberg 			device->use_csums = true;
2935e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29362649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2937b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2938a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2939b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29400f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29410f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29420f0601f4SLars Ellenberg 			goto submit_for_resync;
2943b411b363SPhilipp Reisner 		}
2944b411b363SPhilipp Reisner 		break;
2945b411b363SPhilipp Reisner 
2946b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2947b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29489f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2949de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2950de228bbaSLars Ellenberg 			int i;
2951b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2952b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2953b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2954b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2955de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2956b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2957b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2958de228bbaSLars Ellenberg 			}
2959d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2960b411b363SPhilipp Reisner 					(unsigned long long)sector);
2961b411b363SPhilipp Reisner 		}
2962a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2963b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2964b411b363SPhilipp Reisner 		break;
2965b411b363SPhilipp Reisner 
2966b411b363SPhilipp Reisner 	default:
296749ba9b1bSAndreas Gruenbacher 		BUG();
2968b411b363SPhilipp Reisner 	}
2969b411b363SPhilipp Reisner 
29700f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29710f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29720f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29730f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29740f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29750f0601f4SLars Ellenberg 	 *
29760f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29770f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29780f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29790f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29800f0601f4SLars Ellenberg 	 * a while, anyways.
29810f0601f4SLars Ellenberg 	 */
2982b411b363SPhilipp Reisner 
29830f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
29840f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
29850f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
29860f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
29870f0601f4SLars Ellenberg 	 *
29880f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
29890f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
29900f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
29910f0601f4SLars Ellenberg 	 */
2992c5a2c150SLars Ellenberg 
2993c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2994c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2995c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2996c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2997c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2998c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2999c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
3000c5a2c150SLars Ellenberg 
3001944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
3002ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
3003ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
3004e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
3005944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
3006b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
300780a40e43SLars Ellenberg 		goto out_free_e;
3008b411b363SPhilipp Reisner 
30090f0601f4SLars Ellenberg submit_for_resync:
3010b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
30110f0601f4SLars Ellenberg 
301280a40e43SLars Ellenberg submit:
3013944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
3014b30ab791SAndreas Gruenbacher 	inc_unacked(device);
3015bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
3016bb3cc85eSMike Christie 				     fault_type) == 0)
301782bc0194SAndreas Gruenbacher 		return 0;
3018b411b363SPhilipp Reisner 
301910f6d992SLars Ellenberg 	/* don't care for the reason here */
3020d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
3021c5a2c150SLars Ellenberg 
3022c5a2c150SLars Ellenberg out_free_e:
30230500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
3024a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
30250500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
302622cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
302722cc37a9SLars Ellenberg 
3028b30ab791SAndreas Gruenbacher 	put_ldev(device);
3029b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
303082bc0194SAndreas Gruenbacher 	return -EIO;
3031b411b363SPhilipp Reisner }
3032b411b363SPhilipp Reisner 
30339b48ff07SLee Jones /*
303469a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
303569a22773SAndreas Gruenbacher  */
303669a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3037b411b363SPhilipp Reisner {
303869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3039b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3040b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
304144ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3042b411b363SPhilipp Reisner 
3043b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3044b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3045b411b363SPhilipp Reisner 
3046b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3047b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3048b411b363SPhilipp Reisner 
304944ed167dSPhilipp Reisner 	rcu_read_lock();
305069a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
305144ed167dSPhilipp Reisner 	rcu_read_unlock();
305244ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3053b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3054b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3055b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
305644ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3057d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3058b411b363SPhilipp Reisner 		break;
3059b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3060b411b363SPhilipp Reisner 		break;
3061b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3062b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3063b411b363SPhilipp Reisner 			rv = -1;
3064b411b363SPhilipp Reisner 			break;
3065b411b363SPhilipp Reisner 		}
3066b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3067b411b363SPhilipp Reisner 			rv =  1;
3068b411b363SPhilipp Reisner 			break;
3069b411b363SPhilipp Reisner 		}
3070df561f66SGustavo A. R. Silva 		fallthrough;	/* to one of the other strategies */
3071b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3072b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3073b411b363SPhilipp Reisner 			rv = 1;
3074b411b363SPhilipp Reisner 			break;
3075b411b363SPhilipp Reisner 		}
3076b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3077b411b363SPhilipp Reisner 			rv = -1;
3078b411b363SPhilipp Reisner 			break;
3079b411b363SPhilipp Reisner 		}
3080b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3081d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3082b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3083df561f66SGustavo A. R. Silva 		fallthrough;
3084b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3085b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
308669a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3087b411b363SPhilipp Reisner 				? -1 : 1;
3088b411b363SPhilipp Reisner 			break;
3089b411b363SPhilipp Reisner 		} else {
3090b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3091b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3092b411b363SPhilipp Reisner 		}
309344ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3094b411b363SPhilipp Reisner 			break;
3095df561f66SGustavo A. R. Silva 		fallthrough;
3096b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3097b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3098b411b363SPhilipp Reisner 			rv = -1;
3099b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3100b411b363SPhilipp Reisner 			rv =  1;
3101b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3102b411b363SPhilipp Reisner 		     /* Well, then use something else. */
310369a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3104b411b363SPhilipp Reisner 				? -1 : 1;
3105b411b363SPhilipp Reisner 		break;
3106b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3107b411b363SPhilipp Reisner 		rv = -1;
3108b411b363SPhilipp Reisner 		break;
3109b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3110b411b363SPhilipp Reisner 		rv =  1;
3111b411b363SPhilipp Reisner 	}
3112b411b363SPhilipp Reisner 
3113b411b363SPhilipp Reisner 	return rv;
3114b411b363SPhilipp Reisner }
3115b411b363SPhilipp Reisner 
31169b48ff07SLee Jones /*
311769a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
311869a22773SAndreas Gruenbacher  */
311969a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3120b411b363SPhilipp Reisner {
312169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31226184ea21SAndreas Gruenbacher 	int hg, rv = -100;
312344ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3124b411b363SPhilipp Reisner 
312544ed167dSPhilipp Reisner 	rcu_read_lock();
312669a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
312744ed167dSPhilipp Reisner 	rcu_read_unlock();
312844ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3129b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3130b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3131b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3132b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3133b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
313444ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3135d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3136b411b363SPhilipp Reisner 		break;
3137b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3138b411b363SPhilipp Reisner 		break;
3139b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
314069a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3141b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3142b411b363SPhilipp Reisner 			rv = hg;
3143b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3144b411b363SPhilipp Reisner 			rv = hg;
3145b411b363SPhilipp Reisner 		break;
3146b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
314769a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3148b411b363SPhilipp Reisner 		break;
3149b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3150b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3151b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
315269a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3153b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3154bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3155bb437946SAndreas Gruenbacher 
3156b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3157b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3158b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3159b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3160bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3161b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3162b411b363SPhilipp Reisner 			} else {
3163d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3164b411b363SPhilipp Reisner 				rv = hg;
3165b411b363SPhilipp Reisner 			}
3166b411b363SPhilipp Reisner 		} else
3167b411b363SPhilipp Reisner 			rv = hg;
3168b411b363SPhilipp Reisner 	}
3169b411b363SPhilipp Reisner 
3170b411b363SPhilipp Reisner 	return rv;
3171b411b363SPhilipp Reisner }
3172b411b363SPhilipp Reisner 
31739b48ff07SLee Jones /*
317469a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
317569a22773SAndreas Gruenbacher  */
317669a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3177b411b363SPhilipp Reisner {
317869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31796184ea21SAndreas Gruenbacher 	int hg, rv = -100;
318044ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3181b411b363SPhilipp Reisner 
318244ed167dSPhilipp Reisner 	rcu_read_lock();
318369a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
318444ed167dSPhilipp Reisner 	rcu_read_unlock();
318544ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3186b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3187b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3188b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3189b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3190b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3191b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3192b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
319344ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3194d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3195b411b363SPhilipp Reisner 		break;
3196b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
319769a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3198b411b363SPhilipp Reisner 		break;
3199b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3200b411b363SPhilipp Reisner 		break;
3201b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
320269a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3203b411b363SPhilipp Reisner 		if (hg == -1) {
3204bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3205bb437946SAndreas Gruenbacher 
3206b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3207b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3208b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3209b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3210bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3211b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3212b411b363SPhilipp Reisner 			} else {
3213d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3214b411b363SPhilipp Reisner 				rv = hg;
3215b411b363SPhilipp Reisner 			}
3216b411b363SPhilipp Reisner 		} else
3217b411b363SPhilipp Reisner 			rv = hg;
3218b411b363SPhilipp Reisner 	}
3219b411b363SPhilipp Reisner 
3220b411b363SPhilipp Reisner 	return rv;
3221b411b363SPhilipp Reisner }
3222b411b363SPhilipp Reisner 
3223b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3224b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3225b411b363SPhilipp Reisner {
3226b411b363SPhilipp Reisner 	if (!uuid) {
3227d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3228b411b363SPhilipp Reisner 		return;
3229b411b363SPhilipp Reisner 	}
3230d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3231b411b363SPhilipp Reisner 	     text,
3232b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3233b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3234b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3235b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3236b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3237b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3238b411b363SPhilipp Reisner }
3239b411b363SPhilipp Reisner 
3240b411b363SPhilipp Reisner /*
3241b411b363SPhilipp Reisner   100	after split brain try auto recover
3242b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3243b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3244b411b363SPhilipp Reisner     0	no Sync
3245b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3246b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3247b411b363SPhilipp Reisner  -100	after split brain, disconnect
3248b411b363SPhilipp Reisner -1000	unrelated data
32494a23f264SPhilipp Reisner -1091   requires proto 91
32504a23f264SPhilipp Reisner -1096   requires proto 96
3251b411b363SPhilipp Reisner  */
3252f2d3d75bSLars Ellenberg 
3253f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3254b411b363SPhilipp Reisner {
325544a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
325644a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3257b411b363SPhilipp Reisner 	u64 self, peer;
3258b411b363SPhilipp Reisner 	int i, j;
3259b411b363SPhilipp Reisner 
3260b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3261b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3262b411b363SPhilipp Reisner 
3263b411b363SPhilipp Reisner 	*rule_nr = 10;
3264b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3265b411b363SPhilipp Reisner 		return 0;
3266b411b363SPhilipp Reisner 
3267b411b363SPhilipp Reisner 	*rule_nr = 20;
3268b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3269b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3270b411b363SPhilipp Reisner 		return -2;
3271b411b363SPhilipp Reisner 
3272b411b363SPhilipp Reisner 	*rule_nr = 30;
3273b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3274b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3275b411b363SPhilipp Reisner 		return 2;
3276b411b363SPhilipp Reisner 
3277b411b363SPhilipp Reisner 	if (self == peer) {
3278b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3279b411b363SPhilipp Reisner 
3280b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3281b411b363SPhilipp Reisner 
328244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32834a23f264SPhilipp Reisner 				return -1091;
3284b411b363SPhilipp Reisner 
3285b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3286b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3287d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3288b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3289b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3290b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3291b411b363SPhilipp Reisner 
3292b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3293b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3294b411b363SPhilipp Reisner 				*rule_nr = 34;
3295b411b363SPhilipp Reisner 			} else {
3296d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3297b411b363SPhilipp Reisner 				*rule_nr = 36;
3298b411b363SPhilipp Reisner 			}
3299b411b363SPhilipp Reisner 
3300b411b363SPhilipp Reisner 			return 1;
3301b411b363SPhilipp Reisner 		}
3302b411b363SPhilipp Reisner 
3303b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3304b411b363SPhilipp Reisner 
330544a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33064a23f264SPhilipp Reisner 				return -1091;
3307b411b363SPhilipp Reisner 
3308b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3309b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3310d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3311b411b363SPhilipp Reisner 
3312b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3313b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3314b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3315b411b363SPhilipp Reisner 
3316b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3317b411b363SPhilipp Reisner 				*rule_nr = 35;
3318b411b363SPhilipp Reisner 			} else {
3319d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3320b411b363SPhilipp Reisner 				*rule_nr = 37;
3321b411b363SPhilipp Reisner 			}
3322b411b363SPhilipp Reisner 
3323b411b363SPhilipp Reisner 			return -1;
3324b411b363SPhilipp Reisner 		}
3325b411b363SPhilipp Reisner 
3326b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3327b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3328b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3329b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3330b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3331b411b363SPhilipp Reisner 		*rule_nr = 40;
3332b411b363SPhilipp Reisner 
3333f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3334f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3335f2d3d75bSLars Ellenberg 		if (rct == 0)
3336f2d3d75bSLars Ellenberg 			return 0;
3337f2d3d75bSLars Ellenberg 
3338f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3339f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3340f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3341f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3342f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3343f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3344f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3345f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3346f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3347f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3348f2d3d75bSLars Ellenberg 			}
3349f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3350f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3351f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3352f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3353f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3354f2d3d75bSLars Ellenberg 				return -100;
3355f2d3d75bSLars Ellenberg 			}
3356f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3357f2d3d75bSLars Ellenberg 				return 1;
3358f2d3d75bSLars Ellenberg 			return -1;
3359f2d3d75bSLars Ellenberg 		}
3360f2d3d75bSLars Ellenberg 
3361f2d3d75bSLars Ellenberg 		/* Both are secondary.
3362f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3363f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3364b411b363SPhilipp Reisner 		switch (rct) {
3365f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3366b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3367b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3368b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
336944a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3370b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3371b411b363SPhilipp Reisner 		}
3372b411b363SPhilipp Reisner 	}
3373b411b363SPhilipp Reisner 
3374b411b363SPhilipp Reisner 	*rule_nr = 50;
3375b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3376b411b363SPhilipp Reisner 	if (self == peer)
3377b411b363SPhilipp Reisner 		return -1;
3378b411b363SPhilipp Reisner 
3379b411b363SPhilipp Reisner 	*rule_nr = 51;
3380b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3381b411b363SPhilipp Reisner 	if (self == peer) {
338244a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3383b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3384b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3385b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3386b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3387b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3388b411b363SPhilipp Reisner 
338944a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33904a23f264SPhilipp Reisner 				return -1091;
3391b411b363SPhilipp Reisner 
3392b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3393b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
33944a23f264SPhilipp Reisner 
3395d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3396b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
33974a23f264SPhilipp Reisner 
3398b411b363SPhilipp Reisner 			return -1;
3399b411b363SPhilipp Reisner 		}
3400b411b363SPhilipp Reisner 	}
3401b411b363SPhilipp Reisner 
3402b411b363SPhilipp Reisner 	*rule_nr = 60;
3403b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3404b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3405b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3406b411b363SPhilipp Reisner 		if (self == peer)
3407b411b363SPhilipp Reisner 			return -2;
3408b411b363SPhilipp Reisner 	}
3409b411b363SPhilipp Reisner 
3410b411b363SPhilipp Reisner 	*rule_nr = 70;
3411b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3412b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3413b411b363SPhilipp Reisner 	if (self == peer)
3414b411b363SPhilipp Reisner 		return 1;
3415b411b363SPhilipp Reisner 
3416b411b363SPhilipp Reisner 	*rule_nr = 71;
3417b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3418b411b363SPhilipp Reisner 	if (self == peer) {
341944a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3420b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3421b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3422b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3423b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3424b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3425b411b363SPhilipp Reisner 
342644a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34274a23f264SPhilipp Reisner 				return -1091;
3428b411b363SPhilipp Reisner 
3429b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3430b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3431b411b363SPhilipp Reisner 
3432d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3433b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3434b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3435b411b363SPhilipp Reisner 
3436b411b363SPhilipp Reisner 			return 1;
3437b411b363SPhilipp Reisner 		}
3438b411b363SPhilipp Reisner 	}
3439b411b363SPhilipp Reisner 
3440b411b363SPhilipp Reisner 
3441b411b363SPhilipp Reisner 	*rule_nr = 80;
3442b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3443b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3444b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3445b411b363SPhilipp Reisner 		if (self == peer)
3446b411b363SPhilipp Reisner 			return 2;
3447b411b363SPhilipp Reisner 	}
3448b411b363SPhilipp Reisner 
3449b411b363SPhilipp Reisner 	*rule_nr = 90;
3450b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3451b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3452b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3453b411b363SPhilipp Reisner 		return 100;
3454b411b363SPhilipp Reisner 
3455b411b363SPhilipp Reisner 	*rule_nr = 100;
3456b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3457b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3458b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3459b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3460b411b363SPhilipp Reisner 			if (self == peer)
3461b411b363SPhilipp Reisner 				return -100;
3462b411b363SPhilipp Reisner 		}
3463b411b363SPhilipp Reisner 	}
3464b411b363SPhilipp Reisner 
3465b411b363SPhilipp Reisner 	return -1000;
3466b411b363SPhilipp Reisner }
3467b411b363SPhilipp Reisner 
3468b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3469b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3470b411b363SPhilipp Reisner  */
347169a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
347269a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3473b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3474b411b363SPhilipp Reisner {
347569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3476b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3477b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
347844ed167dSPhilipp Reisner 	struct net_conf *nc;
3479d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3480b411b363SPhilipp Reisner 
3481b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3482b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3483b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3484b411b363SPhilipp Reisner 
3485d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
34869f2247bbSPhilipp Reisner 
3487b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3488b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3489b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3490b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3491b411b363SPhilipp Reisner 
3492f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3493b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3494b411b363SPhilipp Reisner 
3495d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3496b411b363SPhilipp Reisner 
3497b411b363SPhilipp Reisner 	if (hg == -1000) {
3498d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3499b411b363SPhilipp Reisner 		return C_MASK;
3500b411b363SPhilipp Reisner 	}
3501f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3502f2d3d75bSLars Ellenberg 		int proto, fflags;
3503f2d3d75bSLars Ellenberg 		hg = -hg;
3504f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3505f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3506f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3507f2d3d75bSLars Ellenberg 					proto, fflags);
3508f2d3d75bSLars Ellenberg 		return C_MASK;
3509f2d3d75bSLars Ellenberg 	}
35104a23f264SPhilipp Reisner 	if (hg < -1000) {
3511d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3512b411b363SPhilipp Reisner 		return C_MASK;
3513b411b363SPhilipp Reisner 	}
3514b411b363SPhilipp Reisner 
3515b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3516b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3517b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3518b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3519b411b363SPhilipp Reisner 		if (f)
3520b411b363SPhilipp Reisner 			hg = hg*2;
3521d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3522b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3523b411b363SPhilipp Reisner 	}
3524b411b363SPhilipp Reisner 
35253a11a487SAdam Gandelman 	if (abs(hg) == 100)
3526b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35273a11a487SAdam Gandelman 
352844ed167dSPhilipp Reisner 	rcu_read_lock();
352969a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3530d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3531d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3532d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3533d29e89e3SRoland Kammerer 	rcu_read_unlock();
353444ed167dSPhilipp Reisner 
3535d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3536b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3537b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3538b411b363SPhilipp Reisner 		int forced = (hg == -100);
3539b411b363SPhilipp Reisner 
3540b411b363SPhilipp Reisner 		switch (pcount) {
3541b411b363SPhilipp Reisner 		case 0:
354269a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3543b411b363SPhilipp Reisner 			break;
3544b411b363SPhilipp Reisner 		case 1:
354569a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3546b411b363SPhilipp Reisner 			break;
3547b411b363SPhilipp Reisner 		case 2:
354869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3549b411b363SPhilipp Reisner 			break;
3550b411b363SPhilipp Reisner 		}
3551b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3552d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3553b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3554b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3555b411b363SPhilipp Reisner 			if (forced) {
3556d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3557b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3558b411b363SPhilipp Reisner 				hg = hg*2;
3559b411b363SPhilipp Reisner 			}
3560b411b363SPhilipp Reisner 		}
3561b411b363SPhilipp Reisner 	}
3562b411b363SPhilipp Reisner 
3563b411b363SPhilipp Reisner 	if (hg == -100) {
3564b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3565b411b363SPhilipp Reisner 			hg = -1;
3566b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3567b411b363SPhilipp Reisner 			hg = 1;
3568b411b363SPhilipp Reisner 
3569b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3570d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3571b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3572b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3573b411b363SPhilipp Reisner 	}
3574b411b363SPhilipp Reisner 
3575b411b363SPhilipp Reisner 	if (hg == -100) {
3576580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3577580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3578580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3579580b9767SLars Ellenberg 		 * to that disk, in a way... */
3580d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3581b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3582b411b363SPhilipp Reisner 		return C_MASK;
3583b411b363SPhilipp Reisner 	}
3584b411b363SPhilipp Reisner 
3585b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3586d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3587b411b363SPhilipp Reisner 		return C_MASK;
3588b411b363SPhilipp Reisner 	}
3589b411b363SPhilipp Reisner 
3590b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3591b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
359244ed167dSPhilipp Reisner 		switch (rr_conflict) {
3593b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3594b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3595df561f66SGustavo A. R. Silva 			fallthrough;
3596b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3597d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3598b411b363SPhilipp Reisner 			return C_MASK;
3599b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3600d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3601b411b363SPhilipp Reisner 			     "assumption\n");
3602b411b363SPhilipp Reisner 		}
3603b411b363SPhilipp Reisner 	}
3604b411b363SPhilipp Reisner 
360569a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3606cf14c2e9SPhilipp Reisner 		if (hg == 0)
3607d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3608cf14c2e9SPhilipp Reisner 		else
3609d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3610cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3611cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3612cf14c2e9SPhilipp Reisner 		return C_MASK;
3613cf14c2e9SPhilipp Reisner 	}
3614cf14c2e9SPhilipp Reisner 
3615b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3616d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3617b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
361820ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3619b411b363SPhilipp Reisner 			return C_MASK;
3620b411b363SPhilipp Reisner 	}
3621b411b363SPhilipp Reisner 
3622b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3623b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3624b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3625b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3626b411b363SPhilipp Reisner 	} else {
3627b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3628b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3629d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3630b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3631b411b363SPhilipp Reisner 		}
3632b411b363SPhilipp Reisner 	}
3633b411b363SPhilipp Reisner 
3634b411b363SPhilipp Reisner 	return rv;
3635b411b363SPhilipp Reisner }
3636b411b363SPhilipp Reisner 
3637f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3638b411b363SPhilipp Reisner {
3639b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3640f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3641f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3642b411b363SPhilipp Reisner 
3643b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3644f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3645f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3646b411b363SPhilipp Reisner 
3647b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3648f179d76dSPhilipp Reisner 	return peer;
3649b411b363SPhilipp Reisner }
3650b411b363SPhilipp Reisner 
3651bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3652b411b363SPhilipp Reisner {
3653e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3654036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3655036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3656036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3657036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36583d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36597aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3660b411b363SPhilipp Reisner 
3661b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3662b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3663b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3664b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3665b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3666cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36676139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3668cf14c2e9SPhilipp Reisner 
3669bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
367086db0618SAndreas Gruenbacher 		int err;
367186db0618SAndreas Gruenbacher 
367288104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
367386db0618SAndreas Gruenbacher 			return -EIO;
3674bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
367586db0618SAndreas Gruenbacher 		if (err)
367686db0618SAndreas Gruenbacher 			return err;
367788104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3678036b17eaSPhilipp Reisner 	}
367986db0618SAndreas Gruenbacher 
36807d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3681bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3682cf14c2e9SPhilipp Reisner 
3683cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3684bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3685b411b363SPhilipp Reisner 
368644ed167dSPhilipp Reisner 		rcu_read_lock();
3687bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
368844ed167dSPhilipp Reisner 
3689036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
36901ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
369144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3692b411b363SPhilipp Reisner 		}
3693b411b363SPhilipp Reisner 
3694f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
36951ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
369644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3697b411b363SPhilipp Reisner 		}
3698b411b363SPhilipp Reisner 
3699f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
37001ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
370144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3702b411b363SPhilipp Reisner 		}
3703b411b363SPhilipp Reisner 
3704f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
37051ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
370644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3707b411b363SPhilipp Reisner 		}
3708b411b363SPhilipp Reisner 
37096139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
37101ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
371144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3712b411b363SPhilipp Reisner 		}
3713b411b363SPhilipp Reisner 
371444ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
37151ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
371644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3717b411b363SPhilipp Reisner 		}
3718b411b363SPhilipp Reisner 
3719036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
37201ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3721036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3722036b17eaSPhilipp Reisner 		}
3723036b17eaSPhilipp Reisner 
372486db0618SAndreas Gruenbacher 		rcu_read_unlock();
3725fbc12f45SAndreas Gruenbacher 	}
37267d4c782cSAndreas Gruenbacher 
37277d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37287d4c782cSAndreas Gruenbacher 		int hash_size;
37297d4c782cSAndreas Gruenbacher 
37307d4c782cSAndreas Gruenbacher 		/*
37317d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37327d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37337d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37347d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37357d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37367d4c782cSAndreas Gruenbacher 		 * change.
37377d4c782cSAndreas Gruenbacher 		 */
37387d4c782cSAndreas Gruenbacher 
37393d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37401b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37411b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37421ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37437d4c782cSAndreas Gruenbacher 				 integrity_alg);
3744b411b363SPhilipp Reisner 			goto disconnect;
3745b411b363SPhilipp Reisner 		}
3746b411b363SPhilipp Reisner 
37473d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37487d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37497d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37507d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37511ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37527d4c782cSAndreas Gruenbacher 			goto disconnect;
37537d4c782cSAndreas Gruenbacher 		}
37547d4c782cSAndreas Gruenbacher 	}
37557d4c782cSAndreas Gruenbacher 
37567d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37578404e191SZhen Lei 	if (!new_net_conf)
3758b411b363SPhilipp Reisner 		goto disconnect;
3759b411b363SPhilipp Reisner 
3760bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37610500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3762bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37637d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3764b411b363SPhilipp Reisner 
37657d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37667d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37677d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37687d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37697d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3770b411b363SPhilipp Reisner 
3771bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37720500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3773bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3774b411b363SPhilipp Reisner 
37753d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3776bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3777bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3778bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3779bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3780bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3781b411b363SPhilipp Reisner 
37827d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37831ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
37847d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3785b411b363SPhilipp Reisner 
37867d4c782cSAndreas Gruenbacher 	synchronize_rcu();
37877d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
378882bc0194SAndreas Gruenbacher 	return 0;
3789b411b363SPhilipp Reisner 
379044ed167dSPhilipp Reisner disconnect_rcu_unlock:
379144ed167dSPhilipp Reisner 	rcu_read_unlock();
3792b411b363SPhilipp Reisner disconnect:
37933d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3794036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3795036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3796bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
379782bc0194SAndreas Gruenbacher 	return -EIO;
3798b411b363SPhilipp Reisner }
3799b411b363SPhilipp Reisner 
3800b411b363SPhilipp Reisner /* helper function
3801b411b363SPhilipp Reisner  * input: alg name, feature name
3802b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3803b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3804b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
38053d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
38063d0e6375SKees Cook 		const struct drbd_device *device,
3807b411b363SPhilipp Reisner 		const char *alg, const char *name)
3808b411b363SPhilipp Reisner {
38093d0e6375SKees Cook 	struct crypto_shash *tfm;
3810b411b363SPhilipp Reisner 
3811b411b363SPhilipp Reisner 	if (!alg[0])
3812b411b363SPhilipp Reisner 		return NULL;
3813b411b363SPhilipp Reisner 
38143d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3815b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3816d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3817b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3818b411b363SPhilipp Reisner 		return tfm;
3819b411b363SPhilipp Reisner 	}
3820b411b363SPhilipp Reisner 	return tfm;
3821b411b363SPhilipp Reisner }
3822b411b363SPhilipp Reisner 
3823bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3824b411b363SPhilipp Reisner {
3825bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
38264a76b161SAndreas Gruenbacher 	int size = pi->size;
38274a76b161SAndreas Gruenbacher 
38284a76b161SAndreas Gruenbacher 	while (size) {
38294a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3830bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38314a76b161SAndreas Gruenbacher 		if (s <= 0) {
38324a76b161SAndreas Gruenbacher 			if (s < 0)
38334a76b161SAndreas Gruenbacher 				return s;
38344a76b161SAndreas Gruenbacher 			break;
38354a76b161SAndreas Gruenbacher 		}
38364a76b161SAndreas Gruenbacher 		size -= s;
38374a76b161SAndreas Gruenbacher 	}
38384a76b161SAndreas Gruenbacher 	if (size)
38394a76b161SAndreas Gruenbacher 		return -EIO;
38404a76b161SAndreas Gruenbacher 	return 0;
38414a76b161SAndreas Gruenbacher }
38424a76b161SAndreas Gruenbacher 
38434a76b161SAndreas Gruenbacher /*
38444a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38454a76b161SAndreas Gruenbacher  *
38464a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38474a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38484a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38494a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38504a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38514a76b161SAndreas Gruenbacher  *
38524a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38534a76b161SAndreas Gruenbacher  */
3854bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38554a76b161SAndreas Gruenbacher {
38561ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38572fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3858bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38594a76b161SAndreas Gruenbacher }
38604a76b161SAndreas Gruenbacher 
3861bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38624a76b161SAndreas Gruenbacher {
38639f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3864b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3865e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3866b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38673d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38683d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38692ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3870813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3871bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3872813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38736a365874SStephen Kitt 	unsigned int fifo_size = 0;
387482bc0194SAndreas Gruenbacher 	int err;
3875b411b363SPhilipp Reisner 
38769f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38779f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3878bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38799f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3880b411b363SPhilipp Reisner 
3881b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3882b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3883b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
38848e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
38858e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3886b411b363SPhilipp Reisner 
3887e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3888d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3889e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
389082bc0194SAndreas Gruenbacher 		return -EIO;
3891b411b363SPhilipp Reisner 	}
3892b411b363SPhilipp Reisner 
3893b411b363SPhilipp Reisner 	if (apv <= 88) {
3894e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3895e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38968e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3897e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3898e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38990b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
39008e26f9ccSPhilipp Reisner 	} else {
3901e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3902e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39030b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3904b411b363SPhilipp Reisner 	}
3905b411b363SPhilipp Reisner 
3906b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3907e658983aSAndreas Gruenbacher 	p = pi->data;
390852a0cab3SKees Cook 	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
390952a0cab3SKees Cook 	memset(&p->algs, 0, sizeof(p->algs));
3910b411b363SPhilipp Reisner 
39119f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
391282bc0194SAndreas Gruenbacher 	if (err)
391382bc0194SAndreas Gruenbacher 		return err;
3914b411b363SPhilipp Reisner 
39150500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
39169f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3917b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3918daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3919daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3920b30ab791SAndreas Gruenbacher 			put_ldev(device);
39210500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3922d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3923daeda1ccSPhilipp Reisner 			return -ENOMEM;
3924f399002eSLars Ellenberg 		}
3925b411b363SPhilipp Reisner 
3926b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3927daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3928daeda1ccSPhilipp Reisner 
39296394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3930813472ceSPhilipp Reisner 	}
3931b411b363SPhilipp Reisner 
3932b411b363SPhilipp Reisner 	if (apv >= 88) {
3933b411b363SPhilipp Reisner 		if (apv == 88) {
39345de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3935d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39365de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3937b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3938813472ceSPhilipp Reisner 				err = -EIO;
3939813472ceSPhilipp Reisner 				goto reconnect;
3940b411b363SPhilipp Reisner 			}
3941b411b363SPhilipp Reisner 
39429f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3943813472ceSPhilipp Reisner 			if (err)
3944813472ceSPhilipp Reisner 				goto reconnect;
3945b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3946b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39470b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3948b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3949b411b363SPhilipp Reisner 
3950b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3951b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3952b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39530b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39540b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3955b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3956b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3957b411b363SPhilipp Reisner 		}
3958b411b363SPhilipp Reisner 
39592ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3960b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3961d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39622ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3963b411b363SPhilipp Reisner 				goto disconnect;
3964b411b363SPhilipp Reisner 			}
3965b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3966b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3967b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3968b411b363SPhilipp Reisner 				verify_tfm = NULL;
3969b411b363SPhilipp Reisner 				goto disconnect;
3970b411b363SPhilipp Reisner 			}
3971b411b363SPhilipp Reisner 		}
3972b411b363SPhilipp Reisner 
39732ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3974b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3975d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39762ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3977b411b363SPhilipp Reisner 				goto disconnect;
3978b411b363SPhilipp Reisner 			}
3979b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3980b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3981b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3982b411b363SPhilipp Reisner 				csums_tfm = NULL;
3983b411b363SPhilipp Reisner 				goto disconnect;
3984b411b363SPhilipp Reisner 			}
3985b411b363SPhilipp Reisner 		}
3986b411b363SPhilipp Reisner 
3987813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3988daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3989daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3990daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3991daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3992778f271dSPhilipp Reisner 
3993daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3994b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3995813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3996813472ceSPhilipp Reisner 				if (!new_plan) {
3997d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3998b30ab791SAndreas Gruenbacher 					put_ldev(device);
3999778f271dSPhilipp Reisner 					goto disconnect;
4000778f271dSPhilipp Reisner 				}
4001778f271dSPhilipp Reisner 			}
40028e26f9ccSPhilipp Reisner 		}
4003b411b363SPhilipp Reisner 
400491fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
40052ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
40068404e191SZhen Lei 			if (!new_net_conf)
400791fd4dadSPhilipp Reisner 				goto disconnect;
400891fd4dadSPhilipp Reisner 
40092ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
401091fd4dadSPhilipp Reisner 
4011b411b363SPhilipp Reisner 			if (verify_tfm) {
40122ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
40132ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
40143d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
40159f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
4016d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
4017b411b363SPhilipp Reisner 			}
4018b411b363SPhilipp Reisner 			if (csums_tfm) {
40192ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
40202ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
40213d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
40229f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
4023d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4024b411b363SPhilipp Reisner 			}
4025bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
4026778f271dSPhilipp Reisner 		}
4027b411b363SPhilipp Reisner 	}
4028b411b363SPhilipp Reisner 
4029813472ceSPhilipp Reisner 	if (new_disk_conf) {
4030b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4031b30ab791SAndreas Gruenbacher 		put_ldev(device);
4032b411b363SPhilipp Reisner 	}
4033813472ceSPhilipp Reisner 
4034813472ceSPhilipp Reisner 	if (new_plan) {
4035b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4036b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4037813472ceSPhilipp Reisner 	}
4038daeda1ccSPhilipp Reisner 
40390500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4040daeda1ccSPhilipp Reisner 	synchronize_rcu();
4041daeda1ccSPhilipp Reisner 	if (new_net_conf)
4042daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4043daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4044813472ceSPhilipp Reisner 	kfree(old_plan);
4045daeda1ccSPhilipp Reisner 
404682bc0194SAndreas Gruenbacher 	return 0;
4047b411b363SPhilipp Reisner 
4048813472ceSPhilipp Reisner reconnect:
4049813472ceSPhilipp Reisner 	if (new_disk_conf) {
4050b30ab791SAndreas Gruenbacher 		put_ldev(device);
4051813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4052813472ceSPhilipp Reisner 	}
40530500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4054813472ceSPhilipp Reisner 	return -EIO;
4055813472ceSPhilipp Reisner 
4056b411b363SPhilipp Reisner disconnect:
4057813472ceSPhilipp Reisner 	kfree(new_plan);
4058813472ceSPhilipp Reisner 	if (new_disk_conf) {
4059b30ab791SAndreas Gruenbacher 		put_ldev(device);
4060813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4061813472ceSPhilipp Reisner 	}
40620500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4063b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4064b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40653d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4066b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40673d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40689f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
406982bc0194SAndreas Gruenbacher 	return -EIO;
4070b411b363SPhilipp Reisner }
4071b411b363SPhilipp Reisner 
4072b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4073b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4074b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4075b411b363SPhilipp Reisner {
4076b411b363SPhilipp Reisner 	sector_t d;
4077b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4078b411b363SPhilipp Reisner 		return;
4079b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4080b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4081d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4082b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4083b411b363SPhilipp Reisner }
4084b411b363SPhilipp Reisner 
4085bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4086b411b363SPhilipp Reisner {
40879f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4088b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4089e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
40909104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4091e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
40926a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
409394c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4094b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4095e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4096b411b363SPhilipp Reisner 
40979f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40989f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4099bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
41009f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4101155bd9d1SChristoph Hellwig 	cur_size = get_capacity(device->vdisk);
41024a76b161SAndreas Gruenbacher 
4103b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4104b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
41056a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4106b411b363SPhilipp Reisner 
4107b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4108b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4109b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4110b411b363SPhilipp Reisner 
4111b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4112daeda1ccSPhilipp Reisner 		rcu_read_lock();
4113b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4114daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4115daeda1ccSPhilipp Reisner 
4116b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4117b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4118b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4119daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4120b411b363SPhilipp Reisner 
4121b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4122b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4123b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4124daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4125b411b363SPhilipp Reisner 
4126ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4127ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4128ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
412960bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
413060bac040SLars Ellenberg 		if (new_size < cur_size &&
4131b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4132ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
413360bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
413460bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41359f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4136b30ab791SAndreas Gruenbacher 			put_ldev(device);
413782bc0194SAndreas Gruenbacher 			return -EIO;
4138b411b363SPhilipp Reisner 		}
4139daeda1ccSPhilipp Reisner 
4140daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4141daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4142daeda1ccSPhilipp Reisner 
4143daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4144daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4145b30ab791SAndreas Gruenbacher 				put_ldev(device);
4146daeda1ccSPhilipp Reisner 				return -ENOMEM;
4147daeda1ccSPhilipp Reisner 			}
4148daeda1ccSPhilipp Reisner 
41490500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4150b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4151daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4152daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4153daeda1ccSPhilipp Reisner 
4154b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41550500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
4156daeda1ccSPhilipp Reisner 			synchronize_rcu();
4157daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
4158daeda1ccSPhilipp Reisner 
4159ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4160ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4161daeda1ccSPhilipp Reisner 		}
4162daeda1ccSPhilipp Reisner 
4163b30ab791SAndreas Gruenbacher 		put_ldev(device);
4164b411b363SPhilipp Reisner 	}
4165b411b363SPhilipp Reisner 
416620c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4167dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
416820c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4169dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
417020c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
417120c68fdeSLars Ellenberg 
4172e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4173b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41749104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4175b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4176b30ab791SAndreas Gruenbacher 		put_ldev(device);
4177e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
417882bc0194SAndreas Gruenbacher 			return -EIO;
4179b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4180b411b363SPhilipp Reisner 	} else {
41816a8d68b1SLars Ellenberg 		/*
41826a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
41836a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
41846a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
41856a8d68b1SLars Ellenberg 		 *
41866a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
41876a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
41886a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
41896a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
41906a8d68b1SLars Ellenberg 		 *
41916a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
41926a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
419394c43a13SLars Ellenberg 		 *
419494c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
419594c43a13SLars Ellenberg 		 * In which case we ignore this completely.
41966a8d68b1SLars Ellenberg 		 */
419794c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
41989104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
419994c43a13SLars Ellenberg 		if (new_size == 0) {
420094c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
420194c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
420294c43a13SLars Ellenberg 			/* nothing to do */
420394c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
420494c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
420594c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
420694c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
420794c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
420894c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
420994c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
421094c43a13SLars Ellenberg 			return -EIO;
421194c43a13SLars Ellenberg 		} else {
421294c43a13SLars Ellenberg 			/* I believe the peer, if
421394c43a13SLars Ellenberg 			 *  - I don't have a current size myself
421494c43a13SLars Ellenberg 			 *  - we agree on the size anyways
421594c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
421694c43a13SLars Ellenberg 			 *    and he has the only disk
421794c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
421894c43a13SLars Ellenberg 			 *    and he has the only disk,
421994c43a13SLars Ellenberg 			 *    which is larger than my current size
422094c43a13SLars Ellenberg 			 */
422194c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
422294c43a13SLars Ellenberg 		}
4223b411b363SPhilipp Reisner 	}
4224b411b363SPhilipp Reisner 
4225b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4226b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4227b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4228b411b363SPhilipp Reisner 			ldsc = 1;
4229b411b363SPhilipp Reisner 		}
4230b411b363SPhilipp Reisner 
4231b30ab791SAndreas Gruenbacher 		put_ldev(device);
4232b411b363SPhilipp Reisner 	}
4233b411b363SPhilipp Reisner 
4234b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4235155bd9d1SChristoph Hellwig 		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4236155bd9d1SChristoph Hellwig 		    ldsc) {
4237b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4238b411b363SPhilipp Reisner 			 * needs to know my new size... */
423969a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4240b411b363SPhilipp Reisner 		}
4241b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4242b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4243b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4244b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4245e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4246d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4247b411b363SPhilipp Reisner 				else
4248b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4249e89b591cSPhilipp Reisner 			} else
4250b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4251b411b363SPhilipp Reisner 		}
4252b411b363SPhilipp Reisner 	}
4253b411b363SPhilipp Reisner 
425482bc0194SAndreas Gruenbacher 	return 0;
4255b411b363SPhilipp Reisner }
4256b411b363SPhilipp Reisner 
4257bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4258b411b363SPhilipp Reisner {
42599f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4260b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4261e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4262b411b363SPhilipp Reisner 	u64 *p_uuid;
426362b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4264b411b363SPhilipp Reisner 
42659f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42669f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4267bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42689f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42694a76b161SAndreas Gruenbacher 
4270365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42718404e191SZhen Lei 	if (!p_uuid)
4272063eacf8SJing Wang 		return false;
4273b411b363SPhilipp Reisner 
4274b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4275b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4276b411b363SPhilipp Reisner 
4277b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4278b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4279b411b363SPhilipp Reisner 
4280b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4281b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4282b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4283b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4284d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4285b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
42869f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
428782bc0194SAndreas Gruenbacher 		return -EIO;
4288b411b363SPhilipp Reisner 	}
4289b411b363SPhilipp Reisner 
4290b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4291b411b363SPhilipp Reisner 		int skip_initial_sync =
4292b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
42939f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4294b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4295b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4296b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4297d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4298b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
429920ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
430020ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4301b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4302b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4303b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4304b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4305b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
430662b0da3aSLars Ellenberg 			updated_uuids = 1;
4307b411b363SPhilipp Reisner 		}
4308b30ab791SAndreas Gruenbacher 		put_ldev(device);
4309b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4310b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
431118a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
431218a50fa2SPhilipp Reisner 		   for me. */
4313b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4314b411b363SPhilipp Reisner 	}
4315b411b363SPhilipp Reisner 
4316b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4317b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4318b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4319b411b363SPhilipp Reisner 	   new disk state... */
4320b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4321b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4322b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4323b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
432462b0da3aSLars Ellenberg 
432562b0da3aSLars Ellenberg 	if (updated_uuids)
4326b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4327b411b363SPhilipp Reisner 
432882bc0194SAndreas Gruenbacher 	return 0;
4329b411b363SPhilipp Reisner }
4330b411b363SPhilipp Reisner 
4331b411b363SPhilipp Reisner /**
4332b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4333b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4334b411b363SPhilipp Reisner  */
4335b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4336b411b363SPhilipp Reisner {
4337b411b363SPhilipp Reisner 	union drbd_state ms;
4338b411b363SPhilipp Reisner 
4339b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4340369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4341b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4342b411b363SPhilipp Reisner 
4343b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4344b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4345b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4346b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4347b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4348b411b363SPhilipp Reisner 	};
4349b411b363SPhilipp Reisner 
4350b411b363SPhilipp Reisner 	ms.i = ps.i;
4351b411b363SPhilipp Reisner 
4352b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4353b411b363SPhilipp Reisner 	ms.peer = ps.role;
4354b411b363SPhilipp Reisner 	ms.role = ps.peer;
4355b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4356b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4357b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4358b411b363SPhilipp Reisner 
4359b411b363SPhilipp Reisner 	return ms;
4360b411b363SPhilipp Reisner }
4361b411b363SPhilipp Reisner 
4362bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4363b411b363SPhilipp Reisner {
43649f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4365b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4366e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4367b411b363SPhilipp Reisner 	union drbd_state mask, val;
4368bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4369b411b363SPhilipp Reisner 
43709f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43719f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43724a76b161SAndreas Gruenbacher 		return -EIO;
43739f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43744a76b161SAndreas Gruenbacher 
4375b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4376b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4377b411b363SPhilipp Reisner 
43789f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4379b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
438069a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
438182bc0194SAndreas Gruenbacher 		return 0;
4382b411b363SPhilipp Reisner 	}
4383b411b363SPhilipp Reisner 
4384b411b363SPhilipp Reisner 	mask = convert_state(mask);
4385b411b363SPhilipp Reisner 	val = convert_state(val);
4386b411b363SPhilipp Reisner 
4387b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
438869a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4389047cd4a6SPhilipp Reisner 
4390b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4391b411b363SPhilipp Reisner 
439282bc0194SAndreas Gruenbacher 	return 0;
4393b411b363SPhilipp Reisner }
4394b411b363SPhilipp Reisner 
4395bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4396b411b363SPhilipp Reisner {
4397e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4398dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4399dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4400dfafcc8aSPhilipp Reisner 
4401dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4402dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4403dfafcc8aSPhilipp Reisner 
4404bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4405bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4406bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
440782bc0194SAndreas Gruenbacher 		return 0;
4408dfafcc8aSPhilipp Reisner 	}
4409dfafcc8aSPhilipp Reisner 
4410dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4411dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4412dfafcc8aSPhilipp Reisner 
4413bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4414bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4415dfafcc8aSPhilipp Reisner 
441682bc0194SAndreas Gruenbacher 	return 0;
4417dfafcc8aSPhilipp Reisner }
4418dfafcc8aSPhilipp Reisner 
4419bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4420b411b363SPhilipp Reisner {
44219f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4422b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4423e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
44244ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4425b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
442665d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4427b411b363SPhilipp Reisner 	int rv;
4428b411b363SPhilipp Reisner 
44299f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44309f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4431bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44329f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44334a76b161SAndreas Gruenbacher 
4434b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4435b411b363SPhilipp Reisner 
4436b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4437b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4438b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4439d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4440b411b363SPhilipp Reisner 	}
4441b411b363SPhilipp Reisner 
44420500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4443b411b363SPhilipp Reisner  retry:
4444b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44450500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4446b411b363SPhilipp Reisner 
4447668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4448545752d5SLars Ellenberg 	 * already decided to close the connection again,
4449545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4450545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
445158ffa580SLars Ellenberg 		return -ECONNRESET;
4452545752d5SLars Ellenberg 
445340424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
445440424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
445540424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
445640424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
445740424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
445840424e4aSLars Ellenberg 	 */
445940424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
446040424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4461e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4462e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4463e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4464e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4465e9ef7bb6SLars Ellenberg 		 * syncing states.
4466e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4467e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4468e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4469e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4470e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4471e9ef7bb6SLars Ellenberg 
4472e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4473e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4474e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4475e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4476e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4477b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4478b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
447982bc0194SAndreas Gruenbacher 			return 0;
4480e9ef7bb6SLars Ellenberg 		}
4481e9ef7bb6SLars Ellenberg 	}
4482e9ef7bb6SLars Ellenberg 
448302b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
448402b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
448502b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4486b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4487b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
448858ffa580SLars Ellenberg 		return 0;
448902b91b55SLars Ellenberg 	}
449002b91b55SLars Ellenberg 
4491e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4492e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4493e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4494e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4495e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4496e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4497e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4498e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4499e9ef7bb6SLars Ellenberg 
45004ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
45014ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4502b411b363SPhilipp Reisner 
450367531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
450467531718SPhilipp Reisner 		ns.conn = C_BEHIND;
450567531718SPhilipp Reisner 
4506fe43ed97SLars Ellenberg 	/* TODO:
4507fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4508fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4509fe43ed97SLars Ellenberg 	 *
4510fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4511fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4512fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4513fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4514fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4515fe43ed97SLars Ellenberg 	 *
4516fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4517fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4518fe43ed97SLars Ellenberg 	 *
4519fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4520fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4521fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4522fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4523fe43ed97SLars Ellenberg 	 */
4524fe43ed97SLars Ellenberg 
4525b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4526b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4527b411b363SPhilipp Reisner 		int cr; /* consider resync */
4528b411b363SPhilipp Reisner 
4529b411b363SPhilipp Reisner 		/* if we established a new connection */
45304ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4531b411b363SPhilipp Reisner 		/* if we had an established connection
4532b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45334ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4534b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45354ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4536b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4537a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4538b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4539b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4540b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45414ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4542b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4543b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4544b411b363SPhilipp Reisner 
4545b411b363SPhilipp Reisner 		if (cr)
454669a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4547b411b363SPhilipp Reisner 
4548b30ab791SAndreas Gruenbacher 		put_ldev(device);
45494ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45504ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4551b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4552b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4553b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4554d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4555b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4556580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4557b411b363SPhilipp Reisner 			} else {
45589f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
455982bc0194SAndreas Gruenbacher 					return -EIO;
45600b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45619f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
456282bc0194SAndreas Gruenbacher 				return -EIO;
4563b411b363SPhilipp Reisner 			}
4564b411b363SPhilipp Reisner 		}
4565b411b363SPhilipp Reisner 	}
4566b411b363SPhilipp Reisner 
45670500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4568b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4569b411b363SPhilipp Reisner 		goto retry;
4570b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4571b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4572b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4573b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45744ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4575b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45764ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4577b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4578b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
45798554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4580481c6f50SPhilipp Reisner 		   for temporal network outages! */
45810500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4582d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45839f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4584b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4585b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
45869f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
458782bc0194SAndreas Gruenbacher 		return -EIO;
4588481c6f50SPhilipp Reisner 	}
4589b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4590b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
45910500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4592b411b363SPhilipp Reisner 
4593b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
45949f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
459582bc0194SAndreas Gruenbacher 		return -EIO;
4596b411b363SPhilipp Reisner 	}
4597b411b363SPhilipp Reisner 
45984ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
45994ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4600b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4601b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4602b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4603b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
460469a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
460569a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4606b411b363SPhilipp Reisner 		}
4607b411b363SPhilipp Reisner 	}
4608b411b363SPhilipp Reisner 
4609b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4610b411b363SPhilipp Reisner 
4611b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4612b411b363SPhilipp Reisner 
461382bc0194SAndreas Gruenbacher 	return 0;
4614b411b363SPhilipp Reisner }
4615b411b363SPhilipp Reisner 
4616bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4617b411b363SPhilipp Reisner {
46189f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4619b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4620e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
46214a76b161SAndreas Gruenbacher 
46229f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
46239f4fe9adSAndreas Gruenbacher 	if (!peer_device)
46244a76b161SAndreas Gruenbacher 		return -EIO;
46259f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4626b411b363SPhilipp Reisner 
4627b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4628b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4629b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4630b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4631b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4632b411b363SPhilipp Reisner 
46330b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4634b411b363SPhilipp Reisner 
4635b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4636b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4637b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4638b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4639b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4640b411b363SPhilipp Reisner 
4641b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4642b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4643b411b363SPhilipp Reisner 
4644b30ab791SAndreas Gruenbacher 		put_ldev(device);
4645b411b363SPhilipp Reisner 	} else
4646d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4647b411b363SPhilipp Reisner 
464882bc0194SAndreas Gruenbacher 	return 0;
4649b411b363SPhilipp Reisner }
4650b411b363SPhilipp Reisner 
46519b48ff07SLee Jones /*
46522c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46532c46407dSAndreas Gruenbacher  *
46542c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46552c46407dSAndreas Gruenbacher  * code upon failure.
46562c46407dSAndreas Gruenbacher  */
46572c46407dSAndreas Gruenbacher static int
465869a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4659e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4660b411b363SPhilipp Reisner {
466150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
466269a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4663e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
466450d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4665e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46662c46407dSAndreas Gruenbacher 	int err;
4667b411b363SPhilipp Reisner 
466850d0b1adSAndreas Gruenbacher 	if (want != size) {
466969a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46702c46407dSAndreas Gruenbacher 		return -EIO;
4671b411b363SPhilipp Reisner 	}
4672b411b363SPhilipp Reisner 	if (want == 0)
46732c46407dSAndreas Gruenbacher 		return 0;
467469a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
467582bc0194SAndreas Gruenbacher 	if (err)
46762c46407dSAndreas Gruenbacher 		return err;
4677b411b363SPhilipp Reisner 
467869a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4679b411b363SPhilipp Reisner 
4680b411b363SPhilipp Reisner 	c->word_offset += num_words;
4681b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4682b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4683b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4684b411b363SPhilipp Reisner 
46852c46407dSAndreas Gruenbacher 	return 1;
4686b411b363SPhilipp Reisner }
4687b411b363SPhilipp Reisner 
4688a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4689a02d1240SAndreas Gruenbacher {
4690a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4691a02d1240SAndreas Gruenbacher }
4692a02d1240SAndreas Gruenbacher 
4693a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4694a02d1240SAndreas Gruenbacher {
4695a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4696a02d1240SAndreas Gruenbacher }
4697a02d1240SAndreas Gruenbacher 
4698a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4699a02d1240SAndreas Gruenbacher {
4700a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4701a02d1240SAndreas Gruenbacher }
4702a02d1240SAndreas Gruenbacher 
47039b48ff07SLee Jones /*
47042c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
47052c46407dSAndreas Gruenbacher  *
47062c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47072c46407dSAndreas Gruenbacher  * code upon failure.
47082c46407dSAndreas Gruenbacher  */
47092c46407dSAndreas Gruenbacher static int
471069a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4711b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4712c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4713c6d25cfeSPhilipp Reisner 		 unsigned int len)
4714b411b363SPhilipp Reisner {
4715b411b363SPhilipp Reisner 	struct bitstream bs;
4716b411b363SPhilipp Reisner 	u64 look_ahead;
4717b411b363SPhilipp Reisner 	u64 rl;
4718b411b363SPhilipp Reisner 	u64 tmp;
4719b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4720b411b363SPhilipp Reisner 	unsigned long e;
4721a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4722b411b363SPhilipp Reisner 	int have;
4723b411b363SPhilipp Reisner 	int bits;
4724b411b363SPhilipp Reisner 
4725a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4726b411b363SPhilipp Reisner 
4727b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4728b411b363SPhilipp Reisner 	if (bits < 0)
47292c46407dSAndreas Gruenbacher 		return -EIO;
4730b411b363SPhilipp Reisner 
4731b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4732b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4733b411b363SPhilipp Reisner 		if (bits <= 0)
47342c46407dSAndreas Gruenbacher 			return -EIO;
4735b411b363SPhilipp Reisner 
4736b411b363SPhilipp Reisner 		if (toggle) {
4737b411b363SPhilipp Reisner 			e = s + rl -1;
4738b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
473969a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47402c46407dSAndreas Gruenbacher 				return -EIO;
4741b411b363SPhilipp Reisner 			}
474269a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4743b411b363SPhilipp Reisner 		}
4744b411b363SPhilipp Reisner 
4745b411b363SPhilipp Reisner 		if (have < bits) {
474669a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4747b411b363SPhilipp Reisner 				have, bits, look_ahead,
4748b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4749b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47502c46407dSAndreas Gruenbacher 			return -EIO;
4751b411b363SPhilipp Reisner 		}
4752d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4753d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4754b411b363SPhilipp Reisner 			look_ahead >>= bits;
4755d2da5b0cSLars Ellenberg 		else
4756d2da5b0cSLars Ellenberg 			look_ahead = 0;
4757b411b363SPhilipp Reisner 		have -= bits;
4758b411b363SPhilipp Reisner 
4759b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4760b411b363SPhilipp Reisner 		if (bits < 0)
47612c46407dSAndreas Gruenbacher 			return -EIO;
4762b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4763b411b363SPhilipp Reisner 		have += bits;
4764b411b363SPhilipp Reisner 	}
4765b411b363SPhilipp Reisner 
4766b411b363SPhilipp Reisner 	c->bit_offset = s;
4767b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4768b411b363SPhilipp Reisner 
47692c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4770b411b363SPhilipp Reisner }
4771b411b363SPhilipp Reisner 
47729b48ff07SLee Jones /*
47732c46407dSAndreas Gruenbacher  * decode_bitmap_c
47742c46407dSAndreas Gruenbacher  *
47752c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47762c46407dSAndreas Gruenbacher  * code upon failure.
47772c46407dSAndreas Gruenbacher  */
47782c46407dSAndreas Gruenbacher static int
477969a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4780b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4781c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4782c6d25cfeSPhilipp Reisner 		unsigned int len)
4783b411b363SPhilipp Reisner {
4784a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
478569a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4786b411b363SPhilipp Reisner 
4787b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4788b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4789b411b363SPhilipp Reisner 	 * during all our tests. */
4790b411b363SPhilipp Reisner 
479169a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
479269a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
47932c46407dSAndreas Gruenbacher 	return -EIO;
4794b411b363SPhilipp Reisner }
4795b411b363SPhilipp Reisner 
4796b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4797b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4798b411b363SPhilipp Reisner {
4799b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4800a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
480150d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
480250d0b1adSAndreas Gruenbacher 	unsigned int plain =
480350d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
480450d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
480550d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
480650d0b1adSAndreas Gruenbacher 	unsigned int r;
4807b411b363SPhilipp Reisner 
4808b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4809b411b363SPhilipp Reisner 	if (total == 0)
4810b411b363SPhilipp Reisner 		return;
4811b411b363SPhilipp Reisner 
4812b411b363SPhilipp Reisner 	/* don't report if not compressed */
4813b411b363SPhilipp Reisner 	if (total >= plain)
4814b411b363SPhilipp Reisner 		return;
4815b411b363SPhilipp Reisner 
4816b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4817b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4818b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4819b411b363SPhilipp Reisner 
4820b411b363SPhilipp Reisner 	if (r > 1000)
4821b411b363SPhilipp Reisner 		r = 1000;
4822b411b363SPhilipp Reisner 
4823b411b363SPhilipp Reisner 	r = 1000 - r;
4824d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4825b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4826b411b363SPhilipp Reisner 			direction,
4827b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4828b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4829b411b363SPhilipp Reisner 			total, r/10, r % 10);
4830b411b363SPhilipp Reisner }
4831b411b363SPhilipp Reisner 
4832b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4833b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4834b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4835b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4836b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4837b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4838b411b363SPhilipp Reisner 
4839b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4840bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4841b411b363SPhilipp Reisner {
48429f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4843b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4844b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48452c46407dSAndreas Gruenbacher 	int err;
48464a76b161SAndreas Gruenbacher 
48479f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48489f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48494a76b161SAndreas Gruenbacher 		return -EIO;
48509f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4851b411b363SPhilipp Reisner 
4852b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
485320ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
485420ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4855b411b363SPhilipp Reisner 
4856b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4857b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4858b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4859b411b363SPhilipp Reisner 	};
4860b411b363SPhilipp Reisner 
48612c46407dSAndreas Gruenbacher 	for(;;) {
4862e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
486369a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4864e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4865b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4866b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4867e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4868b411b363SPhilipp Reisner 
4869bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4870d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
487182bc0194SAndreas Gruenbacher 				err = -EIO;
4872b411b363SPhilipp Reisner 				goto out;
4873b411b363SPhilipp Reisner 			}
4874e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4875d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
487682bc0194SAndreas Gruenbacher 				err = -EIO;
487778fcbdaeSAndreas Gruenbacher 				goto out;
4878b411b363SPhilipp Reisner 			}
48799f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4880e658983aSAndreas Gruenbacher 			if (err)
4881e658983aSAndreas Gruenbacher 			       goto out;
488269a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4883b411b363SPhilipp Reisner 		} else {
4884d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
488582bc0194SAndreas Gruenbacher 			err = -EIO;
4886b411b363SPhilipp Reisner 			goto out;
4887b411b363SPhilipp Reisner 		}
4888b411b363SPhilipp Reisner 
4889e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4890bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4891b411b363SPhilipp Reisner 
48922c46407dSAndreas Gruenbacher 		if (err <= 0) {
48932c46407dSAndreas Gruenbacher 			if (err < 0)
48942c46407dSAndreas Gruenbacher 				goto out;
4895b411b363SPhilipp Reisner 			break;
48962c46407dSAndreas Gruenbacher 		}
48979f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
489882bc0194SAndreas Gruenbacher 		if (err)
4899b411b363SPhilipp Reisner 			goto out;
49002c46407dSAndreas Gruenbacher 	}
4901b411b363SPhilipp Reisner 
4902b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4903b411b363SPhilipp Reisner 
4904b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4905de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4906de1f8e4aSAndreas Gruenbacher 
4907b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
490882bc0194SAndreas Gruenbacher 		if (err)
4909b411b363SPhilipp Reisner 			goto out;
4910b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4911b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
49120b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4913b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4914b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4915b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4916d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4917b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4918b411b363SPhilipp Reisner 	}
491982bc0194SAndreas Gruenbacher 	err = 0;
4920b411b363SPhilipp Reisner 
4921b411b363SPhilipp Reisner  out:
4922b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4923b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4924b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
492582bc0194SAndreas Gruenbacher 	return err;
4926b411b363SPhilipp Reisner }
4927b411b363SPhilipp Reisner 
4928bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4929b411b363SPhilipp Reisner {
49301ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4931e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4932b411b363SPhilipp Reisner 
4933bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4934b411b363SPhilipp Reisner }
4935b411b363SPhilipp Reisner 
4936bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4937b411b363SPhilipp Reisner {
4938b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4939b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4940ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
494182bc0194SAndreas Gruenbacher 	return 0;
4942b411b363SPhilipp Reisner }
4943b411b363SPhilipp Reisner 
4944bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
494573a01a18SPhilipp Reisner {
49469f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4947b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4948e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49494a76b161SAndreas Gruenbacher 
49509f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49519f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49524a76b161SAndreas Gruenbacher 		return -EIO;
49539f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
495473a01a18SPhilipp Reisner 
4955b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4956f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4957f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4958f735e363SLars Ellenberg 	case C_BEHIND:
4959f735e363SLars Ellenberg 			break;
4960f735e363SLars Ellenberg 	default:
4961d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4962b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4963f735e363SLars Ellenberg 	}
4964f735e363SLars Ellenberg 
4965b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
496673a01a18SPhilipp Reisner 
496782bc0194SAndreas Gruenbacher 	return 0;
496873a01a18SPhilipp Reisner }
496973a01a18SPhilipp Reisner 
4970700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4971700ca8c0SPhilipp Reisner {
4972700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4973700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4974700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4975700ca8c0SPhilipp Reisner 	sector_t sector;
4976700ca8c0SPhilipp Reisner 	int size, err = 0;
4977700ca8c0SPhilipp Reisner 
4978700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
4979700ca8c0SPhilipp Reisner 	if (!peer_device)
4980700ca8c0SPhilipp Reisner 		return -EIO;
4981700ca8c0SPhilipp Reisner 	device = peer_device->device;
4982700ca8c0SPhilipp Reisner 
4983700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
4984700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
4985700ca8c0SPhilipp Reisner 
4986700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
4987700ca8c0SPhilipp Reisner 
4988700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
4989700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
499045c21793SChristoph Hellwig 		const int op = REQ_OP_WRITE_ZEROES;
4991700ca8c0SPhilipp Reisner 
4992700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
49939104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
4994700ca8c0SPhilipp Reisner 		if (!peer_req) {
4995700ca8c0SPhilipp Reisner 			put_ldev(device);
4996700ca8c0SPhilipp Reisner 			return -ENOMEM;
4997700ca8c0SPhilipp Reisner 		}
4998700ca8c0SPhilipp Reisner 
4999700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
5000700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
5001f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
5002700ca8c0SPhilipp Reisner 
5003700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
5004700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
5005700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
5006700ca8c0SPhilipp Reisner 
5007700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
5008700ca8c0SPhilipp Reisner 		err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
5009700ca8c0SPhilipp Reisner 
5010700ca8c0SPhilipp Reisner 		if (err) {
5011700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
5012700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
5013700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
5014700ca8c0SPhilipp Reisner 
5015700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
5016700ca8c0SPhilipp Reisner 			put_ldev(device);
5017700ca8c0SPhilipp Reisner 			err = 0;
5018700ca8c0SPhilipp Reisner 			goto fail;
5019700ca8c0SPhilipp Reisner 		}
5020700ca8c0SPhilipp Reisner 
5021700ca8c0SPhilipp Reisner 		inc_unacked(device);
5022700ca8c0SPhilipp Reisner 
5023700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
5024700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
5025700ca8c0SPhilipp Reisner 	} else {
5026700ca8c0SPhilipp Reisner 	fail:
5027700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
5028700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5029700ca8c0SPhilipp Reisner 	}
5030700ca8c0SPhilipp Reisner 
5031700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5032700ca8c0SPhilipp Reisner 
5033700ca8c0SPhilipp Reisner 	return err;
5034700ca8c0SPhilipp Reisner }
5035700ca8c0SPhilipp Reisner 
503602918be2SPhilipp Reisner struct data_cmd {
503702918be2SPhilipp Reisner 	int expect_payload;
50389104d31aSLars Ellenberg 	unsigned int pkt_size;
5039bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5040b411b363SPhilipp Reisner };
5041b411b363SPhilipp Reisner 
504202918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
504302918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
504402918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
504502918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
504602918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5047e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5048e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5049e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
505002918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
505102918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5052e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5053e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
505402918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
505502918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
505602918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
505702918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
505802918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
505902918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
506002918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
506102918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
506202918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5063700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
506402918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
506573a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50664a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5067036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5068a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5069f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5070700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
50719104d31aSLars Ellenberg 	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
507202918be2SPhilipp Reisner };
507302918be2SPhilipp Reisner 
5074bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5075b411b363SPhilipp Reisner {
507677351055SPhilipp Reisner 	struct packet_info pi;
507702918be2SPhilipp Reisner 	size_t shs; /* sub header size */
507882bc0194SAndreas Gruenbacher 	int err;
5079b411b363SPhilipp Reisner 
5080bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
50819104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5082deebe195SAndreas Gruenbacher 
5083bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5084c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5085c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
508602918be2SPhilipp Reisner 			goto err_out;
508702918be2SPhilipp Reisner 
5088deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
50894a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
50901ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
50912fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
509202918be2SPhilipp Reisner 			goto err_out;
50930b33a916SLars Ellenberg 		}
5094b411b363SPhilipp Reisner 
5095e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
50969104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
50979104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5098e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
50991ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
51002fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5101c13f7e1aSLars Ellenberg 			goto err_out;
5102c13f7e1aSLars Ellenberg 		}
51039104d31aSLars Ellenberg 		if (pi.size < shs) {
51049104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
51059104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
51069104d31aSLars Ellenberg 			goto err_out;
51079104d31aSLars Ellenberg 		}
5108c13f7e1aSLars Ellenberg 
5109c13f7e1aSLars Ellenberg 		if (shs) {
5110944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5111bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5112a5c31904SAndreas Gruenbacher 			if (err)
511302918be2SPhilipp Reisner 				goto err_out;
5114e2857216SAndreas Gruenbacher 			pi.size -= shs;
5115b411b363SPhilipp Reisner 		}
511602918be2SPhilipp Reisner 
5117944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5118bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
51194a76b161SAndreas Gruenbacher 		if (err) {
51201ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
51219f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
512202918be2SPhilipp Reisner 			goto err_out;
512302918be2SPhilipp Reisner 		}
512402918be2SPhilipp Reisner 	}
512582bc0194SAndreas Gruenbacher 	return;
512602918be2SPhilipp Reisner 
512702918be2SPhilipp Reisner     err_out:
5128bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5129b411b363SPhilipp Reisner }
5130b411b363SPhilipp Reisner 
5131bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5132f70b3511SPhilipp Reisner {
5133c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5134bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5135376694a0SPhilipp Reisner 	int vnr;
5136f70b3511SPhilipp Reisner 
5137bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5138b411b363SPhilipp Reisner 		return;
5139b411b363SPhilipp Reisner 
5140545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5141545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5142545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5143545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5144545752d5SLars Ellenberg 	 */
5145bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5146545752d5SLars Ellenberg 
5147668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51481c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5149668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5150668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5151668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5152668700b4SPhilipp Reisner 	}
5153bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5154360cc740SPhilipp Reisner 
5155c141ebdaSPhilipp Reisner 	rcu_read_lock();
5156c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5157c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5158b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5159c141ebdaSPhilipp Reisner 		rcu_read_unlock();
516069a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5161c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5162c141ebdaSPhilipp Reisner 		rcu_read_lock();
5163c141ebdaSPhilipp Reisner 	}
5164c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5165c141ebdaSPhilipp Reisner 
5166bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51671ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
516812038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5169bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5170bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
517112038a3aSPhilipp Reisner 
51721ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5173360cc740SPhilipp Reisner 
5174bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5175bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5176cb703454SPhilipp Reisner 
51770500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5178bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5179bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5180bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5181bbeb641cSPhilipp Reisner 
51820500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5183360cc740SPhilipp Reisner 
5184f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5185bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5186360cc740SPhilipp Reisner }
5187360cc740SPhilipp Reisner 
518869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5189360cc740SPhilipp Reisner {
519069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5191360cc740SPhilipp Reisner 	unsigned int i;
5192b411b363SPhilipp Reisner 
519385719573SPhilipp Reisner 	/* wait for current activity to cease. */
51940500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5195b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5196b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5197b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
51980500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5199b411b363SPhilipp Reisner 
5200b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5201b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5202b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5203b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5204b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5205b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5206b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5207b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5208b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5209b411b363SPhilipp Reisner 	 *  on the fly. */
5210b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5211b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5212b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5213b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5214b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5215b411b363SPhilipp Reisner 
5216b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
52172bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5218b411b363SPhilipp Reisner 
5219b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5220b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5221b411b363SPhilipp Reisner 	 * to be "canceled" */
5222b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5223b411b363SPhilipp Reisner 
5224b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5225b411b363SPhilipp Reisner 
5226d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5227d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5228d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5229b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5230d10b4ea3SPhilipp Reisner 
523108332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
523208332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5233b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5234b411b363SPhilipp Reisner 
5235b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5236b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5237b411b363SPhilipp Reisner 
5238b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
523969a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5240b411b363SPhilipp Reisner 
5241b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5242b411b363SPhilipp Reisner 
5243be115b69SLars Ellenberg 	if (get_ldev(device)) {
5244be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5245be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5246be115b69SLars Ellenberg 		put_ldev(device);
5247be115b69SLars Ellenberg 	}
524820ceb2b2SLars Ellenberg 
5249b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5250b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5251b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5252b411b363SPhilipp Reisner 	 *
5253b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5254b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5255b411b363SPhilipp Reisner 	 */
5256b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5257b411b363SPhilipp Reisner 	if (i)
5258d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5259b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5260435f0740SLars Ellenberg 	if (i)
5261d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5262b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5263b411b363SPhilipp Reisner 	if (i)
5264d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5265b411b363SPhilipp Reisner 
52660b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52670b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52680b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52690b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5270b411b363SPhilipp Reisner 
5271360cc740SPhilipp Reisner 	return 0;
5272b411b363SPhilipp Reisner }
5273b411b363SPhilipp Reisner 
5274b411b363SPhilipp Reisner /*
5275b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5276b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5277b411b363SPhilipp Reisner  *
5278b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5279b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5280b411b363SPhilipp Reisner  *
5281b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5282b411b363SPhilipp Reisner  */
5283bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5284b411b363SPhilipp Reisner {
52859f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
52869f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5287b411b363SPhilipp Reisner 
5288bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5289bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
52909f5bdc33SAndreas Gruenbacher 	if (!p)
5291e8d17b01SAndreas Gruenbacher 		return -EIO;
5292b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5293b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5294b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
529520c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5296bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5297b411b363SPhilipp Reisner }
5298b411b363SPhilipp Reisner 
5299b411b363SPhilipp Reisner /*
5300b411b363SPhilipp Reisner  * return values:
5301b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5302b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5303b411b363SPhilipp Reisner  *  -1 peer talks different language,
5304b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5305b411b363SPhilipp Reisner  */
5306bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5307b411b363SPhilipp Reisner {
5308bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5309e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5310e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
531177351055SPhilipp Reisner 	struct packet_info pi;
5312a5c31904SAndreas Gruenbacher 	int err;
5313b411b363SPhilipp Reisner 
5314bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5315e8d17b01SAndreas Gruenbacher 	if (err)
5316b411b363SPhilipp Reisner 		return 0;
5317b411b363SPhilipp Reisner 
5318bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
531969bc7bc3SAndreas Gruenbacher 	if (err)
5320b411b363SPhilipp Reisner 		return 0;
5321b411b363SPhilipp Reisner 
53226038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
53231ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
532477351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5325b411b363SPhilipp Reisner 		return -1;
5326b411b363SPhilipp Reisner 	}
5327b411b363SPhilipp Reisner 
532877351055SPhilipp Reisner 	if (pi.size != expect) {
53291ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
533077351055SPhilipp Reisner 		     expect, pi.size);
5331b411b363SPhilipp Reisner 		return -1;
5332b411b363SPhilipp Reisner 	}
5333b411b363SPhilipp Reisner 
5334e658983aSAndreas Gruenbacher 	p = pi.data;
5335bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5336a5c31904SAndreas Gruenbacher 	if (err)
5337b411b363SPhilipp Reisner 		return 0;
5338b411b363SPhilipp Reisner 
5339b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5340b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5341b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5342b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5343b411b363SPhilipp Reisner 
5344b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5345b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5346b411b363SPhilipp Reisner 		goto incompat;
5347b411b363SPhilipp Reisner 
5348bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
534920c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5350b411b363SPhilipp Reisner 
53511ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5352bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5353b411b363SPhilipp Reisner 
5354f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53559104d31aSLars Ellenberg 		  connection->agreed_features,
53569104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53579104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5358f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5359f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53609104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
536192d94ae6SPhilipp Reisner 
5362b411b363SPhilipp Reisner 	return 1;
5363b411b363SPhilipp Reisner 
5364b411b363SPhilipp Reisner  incompat:
53651ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5366b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5367b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5368b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5369b411b363SPhilipp Reisner 	return -1;
5370b411b363SPhilipp Reisner }
5371b411b363SPhilipp Reisner 
5372b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5373bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5374b411b363SPhilipp Reisner {
53751ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53761ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5377b10d96cbSJohannes Thoma 	return -1;
5378b411b363SPhilipp Reisner }
5379b411b363SPhilipp Reisner #else
5380b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5381b10d96cbSJohannes Thoma 
5382b10d96cbSJohannes Thoma /* Return value:
5383b10d96cbSJohannes Thoma 	1 - auth succeeded,
5384b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5385b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5386b10d96cbSJohannes Thoma */
5387b10d96cbSJohannes Thoma 
5388bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5389b411b363SPhilipp Reisner {
53909f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5391b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5392b411b363SPhilipp Reisner 	char *response = NULL;
5393b411b363SPhilipp Reisner 	char *right_response = NULL;
5394b411b363SPhilipp Reisner 	char *peers_ch = NULL;
539544ed167dSPhilipp Reisner 	unsigned int key_len;
539644ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5397b411b363SPhilipp Reisner 	unsigned int resp_size;
539877ce56e2SArnd Bergmann 	struct shash_desc *desc;
539977351055SPhilipp Reisner 	struct packet_info pi;
540044ed167dSPhilipp Reisner 	struct net_conf *nc;
540169bc7bc3SAndreas Gruenbacher 	int err, rv;
5402b411b363SPhilipp Reisner 
54039f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
54049f5bdc33SAndreas Gruenbacher 
540544ed167dSPhilipp Reisner 	rcu_read_lock();
5406bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
540744ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
540844ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
540944ed167dSPhilipp Reisner 	rcu_read_unlock();
541044ed167dSPhilipp Reisner 
541177ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
541277ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
541377ce56e2SArnd Bergmann 		       GFP_KERNEL);
541477ce56e2SArnd Bergmann 	if (!desc) {
541577ce56e2SArnd Bergmann 		rv = -1;
541677ce56e2SArnd Bergmann 		goto fail;
541777ce56e2SArnd Bergmann 	}
54189534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5419b411b363SPhilipp Reisner 
54209534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5421b411b363SPhilipp Reisner 	if (rv) {
54229534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5423b10d96cbSJohannes Thoma 		rv = -1;
5424b411b363SPhilipp Reisner 		goto fail;
5425b411b363SPhilipp Reisner 	}
5426b411b363SPhilipp Reisner 
5427b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5428b411b363SPhilipp Reisner 
5429bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5430bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54319f5bdc33SAndreas Gruenbacher 		rv = 0;
54329f5bdc33SAndreas Gruenbacher 		goto fail;
54339f5bdc33SAndreas Gruenbacher 	}
5434bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54359f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5436b411b363SPhilipp Reisner 	if (!rv)
5437b411b363SPhilipp Reisner 		goto fail;
5438b411b363SPhilipp Reisner 
5439bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
544069bc7bc3SAndreas Gruenbacher 	if (err) {
5441b411b363SPhilipp Reisner 		rv = 0;
5442b411b363SPhilipp Reisner 		goto fail;
5443b411b363SPhilipp Reisner 	}
5444b411b363SPhilipp Reisner 
544577351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54461ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
544777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54489049ccd4SLars Ellenberg 		rv = -1;
5449b411b363SPhilipp Reisner 		goto fail;
5450b411b363SPhilipp Reisner 	}
5451b411b363SPhilipp Reisner 
545277351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54531ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5454b10d96cbSJohannes Thoma 		rv = -1;
5455b411b363SPhilipp Reisner 		goto fail;
5456b411b363SPhilipp Reisner 	}
5457b411b363SPhilipp Reisner 
545867cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
545967cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
546067cca286SPhilipp Reisner 		rv = -1;
546167cca286SPhilipp Reisner 		goto fail;
546267cca286SPhilipp Reisner 	}
546367cca286SPhilipp Reisner 
546477351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
54658404e191SZhen Lei 	if (!peers_ch) {
5466b10d96cbSJohannes Thoma 		rv = -1;
5467b411b363SPhilipp Reisner 		goto fail;
5468b411b363SPhilipp Reisner 	}
5469b411b363SPhilipp Reisner 
5470bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5471a5c31904SAndreas Gruenbacher 	if (err) {
5472b411b363SPhilipp Reisner 		rv = 0;
5473b411b363SPhilipp Reisner 		goto fail;
5474b411b363SPhilipp Reisner 	}
5475b411b363SPhilipp Reisner 
547667cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
547767cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
547867cca286SPhilipp Reisner 		rv = -1;
547967cca286SPhilipp Reisner 		goto fail;
548067cca286SPhilipp Reisner 	}
548167cca286SPhilipp Reisner 
54829534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5483b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
54848404e191SZhen Lei 	if (!response) {
5485b10d96cbSJohannes Thoma 		rv = -1;
5486b411b363SPhilipp Reisner 		goto fail;
5487b411b363SPhilipp Reisner 	}
5488b411b363SPhilipp Reisner 
54899534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5490b411b363SPhilipp Reisner 	if (rv) {
54911ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5492b10d96cbSJohannes Thoma 		rv = -1;
5493b411b363SPhilipp Reisner 		goto fail;
5494b411b363SPhilipp Reisner 	}
5495b411b363SPhilipp Reisner 
5496bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54979f5bdc33SAndreas Gruenbacher 		rv = 0;
54989f5bdc33SAndreas Gruenbacher 		goto fail;
54999f5bdc33SAndreas Gruenbacher 	}
5500bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
55019f5bdc33SAndreas Gruenbacher 				response, resp_size);
5502b411b363SPhilipp Reisner 	if (!rv)
5503b411b363SPhilipp Reisner 		goto fail;
5504b411b363SPhilipp Reisner 
5505bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
550669bc7bc3SAndreas Gruenbacher 	if (err) {
5507b411b363SPhilipp Reisner 		rv = 0;
5508b411b363SPhilipp Reisner 		goto fail;
5509b411b363SPhilipp Reisner 	}
5510b411b363SPhilipp Reisner 
551177351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
55121ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
551377351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5514b411b363SPhilipp Reisner 		rv = 0;
5515b411b363SPhilipp Reisner 		goto fail;
5516b411b363SPhilipp Reisner 	}
5517b411b363SPhilipp Reisner 
551877351055SPhilipp Reisner 	if (pi.size != resp_size) {
55191ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5520b411b363SPhilipp Reisner 		rv = 0;
5521b411b363SPhilipp Reisner 		goto fail;
5522b411b363SPhilipp Reisner 	}
5523b411b363SPhilipp Reisner 
5524bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5525a5c31904SAndreas Gruenbacher 	if (err) {
5526b411b363SPhilipp Reisner 		rv = 0;
5527b411b363SPhilipp Reisner 		goto fail;
5528b411b363SPhilipp Reisner 	}
5529b411b363SPhilipp Reisner 
5530b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55318404e191SZhen Lei 	if (!right_response) {
5532b10d96cbSJohannes Thoma 		rv = -1;
5533b411b363SPhilipp Reisner 		goto fail;
5534b411b363SPhilipp Reisner 	}
5535b411b363SPhilipp Reisner 
55369534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55379534d671SHerbert Xu 				 right_response);
5538b411b363SPhilipp Reisner 	if (rv) {
55391ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5540b10d96cbSJohannes Thoma 		rv = -1;
5541b411b363SPhilipp Reisner 		goto fail;
5542b411b363SPhilipp Reisner 	}
5543b411b363SPhilipp Reisner 
5544b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5545b411b363SPhilipp Reisner 
5546b411b363SPhilipp Reisner 	if (rv)
55471ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
554844ed167dSPhilipp Reisner 		     resp_size);
5549b10d96cbSJohannes Thoma 	else
5550b10d96cbSJohannes Thoma 		rv = -1;
5551b411b363SPhilipp Reisner 
5552b411b363SPhilipp Reisner  fail:
5553b411b363SPhilipp Reisner 	kfree(peers_ch);
5554b411b363SPhilipp Reisner 	kfree(response);
5555b411b363SPhilipp Reisner 	kfree(right_response);
555677ce56e2SArnd Bergmann 	if (desc) {
55579534d671SHerbert Xu 		shash_desc_zero(desc);
555877ce56e2SArnd Bergmann 		kfree(desc);
555977ce56e2SArnd Bergmann 	}
5560b411b363SPhilipp Reisner 
5561b411b363SPhilipp Reisner 	return rv;
5562b411b363SPhilipp Reisner }
5563b411b363SPhilipp Reisner #endif
5564b411b363SPhilipp Reisner 
55658fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5566b411b363SPhilipp Reisner {
5567bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5568b411b363SPhilipp Reisner 	int h;
5569b411b363SPhilipp Reisner 
55701ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5571b411b363SPhilipp Reisner 
5572b411b363SPhilipp Reisner 	do {
5573bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5574b411b363SPhilipp Reisner 		if (h == 0) {
5575bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
557620ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5577b411b363SPhilipp Reisner 		}
5578b411b363SPhilipp Reisner 		if (h == -1) {
55791ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5580bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5581b411b363SPhilipp Reisner 		}
5582b411b363SPhilipp Reisner 	} while (h == 0);
5583b411b363SPhilipp Reisner 
5584c51a0ef3SLars Ellenberg 	if (h > 0) {
5585c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5586bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5587c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5588c51a0ef3SLars Ellenberg 	}
5589b411b363SPhilipp Reisner 
5590bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5591b411b363SPhilipp Reisner 
55921ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5593b411b363SPhilipp Reisner 	return 0;
5594b411b363SPhilipp Reisner }
5595b411b363SPhilipp Reisner 
5596b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5597b411b363SPhilipp Reisner 
5598bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5599b411b363SPhilipp Reisner {
5600e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5601b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5602b411b363SPhilipp Reisner 
5603b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5604bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5605b411b363SPhilipp Reisner 	} else {
5606bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
56071ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5608fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5609fc3b10a4SPhilipp Reisner 	}
5610bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5611e4f78edeSPhilipp Reisner 
56122735a594SAndreas Gruenbacher 	return 0;
5613fc3b10a4SPhilipp Reisner }
5614e4f78edeSPhilipp Reisner 
5615bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5616e4f78edeSPhilipp Reisner {
56179f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5618b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5619e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5620e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5621e4f78edeSPhilipp Reisner 
56229f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56239f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56242735a594SAndreas Gruenbacher 		return -EIO;
56259f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56261952e916SAndreas Gruenbacher 
5627bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56280b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5629bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
56304d0fc3fdSPhilipp Reisner 	}
56314d0fc3fdSPhilipp Reisner 
5632e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5633b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5634e4f78edeSPhilipp Reisner 	} else {
5635b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5636d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5637b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5638b411b363SPhilipp Reisner 	}
5639b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5640b411b363SPhilipp Reisner 
56412735a594SAndreas Gruenbacher 	return 0;
5642b411b363SPhilipp Reisner }
5643b411b363SPhilipp Reisner 
5644bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5645b411b363SPhilipp Reisner {
5646bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5647b411b363SPhilipp Reisner 
5648b411b363SPhilipp Reisner }
5649b411b363SPhilipp Reisner 
5650bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5651b411b363SPhilipp Reisner {
5652b411b363SPhilipp Reisner 	/* restore idle timeout */
5653bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5654bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5655bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5656b411b363SPhilipp Reisner 
56572735a594SAndreas Gruenbacher 	return 0;
5658b411b363SPhilipp Reisner }
5659b411b363SPhilipp Reisner 
5660bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5661b411b363SPhilipp Reisner {
56629f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5663b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5664e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5665b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5666b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5667b411b363SPhilipp Reisner 
56689f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56699f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56702735a594SAndreas Gruenbacher 		return -EIO;
56719f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56721952e916SAndreas Gruenbacher 
56739f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5674b411b363SPhilipp Reisner 
567569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5676b411b363SPhilipp Reisner 
5677b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5678b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5679b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5680b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5681b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5682b30ab791SAndreas Gruenbacher 		put_ldev(device);
56831d53f09eSLars Ellenberg 	}
5684b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5685b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5686b411b363SPhilipp Reisner 
56872735a594SAndreas Gruenbacher 	return 0;
5688b411b363SPhilipp Reisner }
5689b411b363SPhilipp Reisner 
5690bc9c5c41SAndreas Gruenbacher static int
5691b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5692bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5693bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5694b411b363SPhilipp Reisner {
5695b411b363SPhilipp Reisner 	struct drbd_request *req;
5696b411b363SPhilipp Reisner 	struct bio_and_error m;
5697b411b363SPhilipp Reisner 
56980500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5699b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5700b411b363SPhilipp Reisner 	if (unlikely(!req)) {
57010500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
570285997675SAndreas Gruenbacher 		return -EIO;
5703b411b363SPhilipp Reisner 	}
5704b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
57050500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5706b411b363SPhilipp Reisner 
5707b411b363SPhilipp Reisner 	if (m.bio)
5708b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
570985997675SAndreas Gruenbacher 	return 0;
5710b411b363SPhilipp Reisner }
5711b411b363SPhilipp Reisner 
5712bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5713b411b363SPhilipp Reisner {
57149f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5715b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5716e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5717b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5718b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5719b411b363SPhilipp Reisner 	enum drbd_req_event what;
5720b411b363SPhilipp Reisner 
57219f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57229f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57232735a594SAndreas Gruenbacher 		return -EIO;
57249f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57251952e916SAndreas Gruenbacher 
572669a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5727b411b363SPhilipp Reisner 
5728579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5729b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5730b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
57312735a594SAndreas Gruenbacher 		return 0;
5732b411b363SPhilipp Reisner 	}
5733e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5734b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57358554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5736b411b363SPhilipp Reisner 		break;
5737b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57388554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5739b411b363SPhilipp Reisner 		break;
5740b411b363SPhilipp Reisner 	case P_RECV_ACK:
57418554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5742b411b363SPhilipp Reisner 		break;
5743d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5744d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57457be8da07SAndreas Gruenbacher 		break;
57467be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57477be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5748b411b363SPhilipp Reisner 		break;
5749b411b363SPhilipp Reisner 	default:
57502735a594SAndreas Gruenbacher 		BUG();
5751b411b363SPhilipp Reisner 	}
5752b411b363SPhilipp Reisner 
5753b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5754b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5755bc9c5c41SAndreas Gruenbacher 					     what, false);
5756b411b363SPhilipp Reisner }
5757b411b363SPhilipp Reisner 
5758bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5759b411b363SPhilipp Reisner {
57609f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5761b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5762e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5763b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57642deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
576585997675SAndreas Gruenbacher 	int err;
5766b411b363SPhilipp Reisner 
57679f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57689f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57692735a594SAndreas Gruenbacher 		return -EIO;
57709f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5771b411b363SPhilipp Reisner 
577269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5773b411b363SPhilipp Reisner 
5774579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5775b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5776b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
57772735a594SAndreas Gruenbacher 		return 0;
5778b411b363SPhilipp Reisner 	}
57792deb8336SPhilipp Reisner 
5780b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5781b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5782303d1448SPhilipp Reisner 					    NEG_ACKED, true);
578385997675SAndreas Gruenbacher 	if (err) {
57842deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
57852deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5786c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
57872deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
57882deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5789b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
57902deb8336SPhilipp Reisner 	}
57912735a594SAndreas Gruenbacher 	return 0;
5792b411b363SPhilipp Reisner }
5793b411b363SPhilipp Reisner 
5794bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5795b411b363SPhilipp Reisner {
57969f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5797b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5798e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5799b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5800b411b363SPhilipp Reisner 
58019f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58029f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58032735a594SAndreas Gruenbacher 		return -EIO;
58049f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58051952e916SAndreas Gruenbacher 
580669a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
58077be8da07SAndreas Gruenbacher 
5808d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5809b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5810b411b363SPhilipp Reisner 
5811b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5812b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
58138554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5814b411b363SPhilipp Reisner }
5815b411b363SPhilipp Reisner 
5816bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5817b411b363SPhilipp Reisner {
58189f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5819b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5820b411b363SPhilipp Reisner 	sector_t sector;
5821b411b363SPhilipp Reisner 	int size;
5822e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
58231952e916SAndreas Gruenbacher 
58249f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58259f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58262735a594SAndreas Gruenbacher 		return -EIO;
58279f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5828b411b363SPhilipp Reisner 
5829b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5830b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5831b411b363SPhilipp Reisner 
583269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5833b411b363SPhilipp Reisner 
5834b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5835b411b363SPhilipp Reisner 
5836b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5837b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5838e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5839d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5840b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
58416327c911SGustavo A. R. Silva 			break;
5842d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5843d612d309SPhilipp Reisner 			break;
5844d612d309SPhilipp Reisner 		default:
58452735a594SAndreas Gruenbacher 			BUG();
5846d612d309SPhilipp Reisner 		}
5847b30ab791SAndreas Gruenbacher 		put_ldev(device);
5848b411b363SPhilipp Reisner 	}
5849b411b363SPhilipp Reisner 
58502735a594SAndreas Gruenbacher 	return 0;
5851b411b363SPhilipp Reisner }
5852b411b363SPhilipp Reisner 
5853bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5854b411b363SPhilipp Reisner {
5855e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5856c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58579ed57dcbSLars Ellenberg 	int vnr;
5858b411b363SPhilipp Reisner 
5859bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5860b411b363SPhilipp Reisner 
58619ed57dcbSLars Ellenberg 	rcu_read_lock();
5862c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5863c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5864c06ece6bSAndreas Gruenbacher 
5865b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5866b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5867b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5868b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5869b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5870c4752ef1SPhilipp Reisner 		}
58719ed57dcbSLars Ellenberg 	}
58729ed57dcbSLars Ellenberg 	rcu_read_unlock();
5873c4752ef1SPhilipp Reisner 
58742735a594SAndreas Gruenbacher 	return 0;
5875b411b363SPhilipp Reisner }
5876b411b363SPhilipp Reisner 
5877bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5878b411b363SPhilipp Reisner {
58799f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5880b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5881e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
588284b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5883b411b363SPhilipp Reisner 	sector_t sector;
5884b411b363SPhilipp Reisner 	int size;
5885b411b363SPhilipp Reisner 
58869f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58879f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58882735a594SAndreas Gruenbacher 		return -EIO;
58899f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58901952e916SAndreas Gruenbacher 
5891b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5892b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5893b411b363SPhilipp Reisner 
589469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5895b411b363SPhilipp Reisner 
5896b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5897b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5898b411b363SPhilipp Reisner 	else
5899b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5900b411b363SPhilipp Reisner 
5901b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
59022735a594SAndreas Gruenbacher 		return 0;
59031d53f09eSLars Ellenberg 
5904b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5905b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5906b411b363SPhilipp Reisner 
5907b30ab791SAndreas Gruenbacher 	--device->ov_left;
5908ea5442afSLars Ellenberg 
5909ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5910b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5911b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5912ea5442afSLars Ellenberg 
5913b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
591484b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
591584b8c06bSAndreas Gruenbacher 		if (dw) {
591684b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
591784b8c06bSAndreas Gruenbacher 			dw->device = device;
591884b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5919b411b363SPhilipp Reisner 		} else {
592084b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5921b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5922b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5923b411b363SPhilipp Reisner 		}
5924b411b363SPhilipp Reisner 	}
5925b30ab791SAndreas Gruenbacher 	put_ldev(device);
59262735a594SAndreas Gruenbacher 	return 0;
5927b411b363SPhilipp Reisner }
5928b411b363SPhilipp Reisner 
5929bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59300ced55a3SPhilipp Reisner {
59312735a594SAndreas Gruenbacher 	return 0;
59320ced55a3SPhilipp Reisner }
59330ced55a3SPhilipp Reisner 
5934668700b4SPhilipp Reisner struct meta_sock_cmd {
5935b411b363SPhilipp Reisner 	size_t pkt_size;
5936bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5937b411b363SPhilipp Reisner };
5938b411b363SPhilipp Reisner 
5939668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5940668700b4SPhilipp Reisner {
5941668700b4SPhilipp Reisner 	long t;
5942668700b4SPhilipp Reisner 	struct net_conf *nc;
5943668700b4SPhilipp Reisner 
5944668700b4SPhilipp Reisner 	rcu_read_lock();
5945668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5946668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5947668700b4SPhilipp Reisner 	rcu_read_unlock();
5948668700b4SPhilipp Reisner 
5949668700b4SPhilipp Reisner 	t *= HZ;
5950668700b4SPhilipp Reisner 	if (ping_timeout)
5951668700b4SPhilipp Reisner 		t /= 10;
5952668700b4SPhilipp Reisner 
5953668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5954668700b4SPhilipp Reisner }
5955668700b4SPhilipp Reisner 
5956668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5957668700b4SPhilipp Reisner {
5958668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5959668700b4SPhilipp Reisner }
5960668700b4SPhilipp Reisner 
5961668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5962668700b4SPhilipp Reisner {
5963668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5964668700b4SPhilipp Reisner }
5965668700b4SPhilipp Reisner 
5966668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5967e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5968e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5969b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5970b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5971b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5972d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5973b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5974b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5975b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5976b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5977b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5978b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5979b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
598002918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5981d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59821952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59831952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5984b411b363SPhilipp Reisner };
5985b411b363SPhilipp Reisner 
59861c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5987b411b363SPhilipp Reisner {
5988bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5989668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
599077351055SPhilipp Reisner 	struct packet_info pi;
5991668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5992257d0af6SPhilipp Reisner 	int rv;
5993bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5994b411b363SPhilipp Reisner 	int received = 0;
5995bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
599652b061a4SAndreas Gruenbacher 	int expect   = header_size;
599744ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
5998b411b363SPhilipp Reisner 
59998b700983SPeter Zijlstra 	sched_set_fifo_low(current);
6000b411b363SPhilipp Reisner 
6001e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
600280822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
600344ed167dSPhilipp Reisner 
6004668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
600544ed167dSPhilipp Reisner 
6006bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
6007bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
60081ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
6009841ce241SAndreas Gruenbacher 				goto reconnect;
6010841ce241SAndreas Gruenbacher 			}
6011668700b4SPhilipp Reisner 			set_ping_timeout(connection);
601244ed167dSPhilipp Reisner 			ping_timeout_active = true;
6013b411b363SPhilipp Reisner 		}
6014b411b363SPhilipp Reisner 
6015668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
6016bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
6017b411b363SPhilipp Reisner 
6018b411b363SPhilipp Reisner 		/* Note:
6019b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
6020b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
6021b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
6022b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
6023b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
6024b411b363SPhilipp Reisner 		 * rv == expected: full header or command
6025b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
6026b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
6027b411b363SPhilipp Reisner 		 */
6028b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
6029b411b363SPhilipp Reisner 			received += rv;
6030b411b363SPhilipp Reisner 			buf	 += rv;
6031b411b363SPhilipp Reisner 		} else if (rv == 0) {
6032bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6033b66623e3SPhilipp Reisner 				long t;
6034b66623e3SPhilipp Reisner 				rcu_read_lock();
6035bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6036b66623e3SPhilipp Reisner 				rcu_read_unlock();
6037b66623e3SPhilipp Reisner 
6038bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6039bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6040b66623e3SPhilipp Reisner 						       t);
6041599377acSPhilipp Reisner 				if (t)
6042599377acSPhilipp Reisner 					break;
6043599377acSPhilipp Reisner 			}
60441ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6045b411b363SPhilipp Reisner 			goto reconnect;
6046b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6047cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6048cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6049668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6050cb6518cbSLars Ellenberg 				continue;
6051f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60521ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6053b411b363SPhilipp Reisner 				goto reconnect;
6054b411b363SPhilipp Reisner 			}
6055bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6056b411b363SPhilipp Reisner 			continue;
6057b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6058668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6059668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6060668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6061668700b4SPhilipp Reisner 			flush_signals(current);
6062b411b363SPhilipp Reisner 			continue;
6063b411b363SPhilipp Reisner 		} else {
60641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6065b411b363SPhilipp Reisner 			goto reconnect;
6066b411b363SPhilipp Reisner 		}
6067b411b363SPhilipp Reisner 
6068b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6069bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6070b411b363SPhilipp Reisner 				goto reconnect;
6071668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6072668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60731ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60742fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6075b411b363SPhilipp Reisner 				goto disconnect;
6076b411b363SPhilipp Reisner 			}
6077e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
607852b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60791ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
608077351055SPhilipp Reisner 					pi.cmd, pi.size);
6081b411b363SPhilipp Reisner 				goto reconnect;
6082b411b363SPhilipp Reisner 			}
6083257d0af6SPhilipp Reisner 		}
6084b411b363SPhilipp Reisner 		if (received == expect) {
60852735a594SAndreas Gruenbacher 			bool err;
6086a4fbda8eSPhilipp Reisner 
6087bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
60882735a594SAndreas Gruenbacher 			if (err) {
6089d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6090b411b363SPhilipp Reisner 				goto reconnect;
60911952e916SAndreas Gruenbacher 			}
6092b411b363SPhilipp Reisner 
6093bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6094f36af18cSLars Ellenberg 
6095668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6096668700b4SPhilipp Reisner 				set_idle_timeout(connection);
609744ed167dSPhilipp Reisner 				ping_timeout_active = false;
609844ed167dSPhilipp Reisner 			}
6099b411b363SPhilipp Reisner 
6100bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6101b411b363SPhilipp Reisner 			received = 0;
610252b061a4SAndreas Gruenbacher 			expect	 = header_size;
6103b411b363SPhilipp Reisner 			cmd	 = NULL;
6104b411b363SPhilipp Reisner 		}
6105b411b363SPhilipp Reisner 	}
6106b411b363SPhilipp Reisner 
6107b411b363SPhilipp Reisner 	if (0) {
6108b411b363SPhilipp Reisner reconnect:
6109bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6110bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6111b411b363SPhilipp Reisner 	}
6112b411b363SPhilipp Reisner 	if (0) {
6113b411b363SPhilipp Reisner disconnect:
6114bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6115b411b363SPhilipp Reisner 	}
6116b411b363SPhilipp Reisner 
6117668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6118b411b363SPhilipp Reisner 
6119b411b363SPhilipp Reisner 	return 0;
6120b411b363SPhilipp Reisner }
6121668700b4SPhilipp Reisner 
6122668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6123668700b4SPhilipp Reisner {
6124668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6125668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6126668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6127668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6128668700b4SPhilipp Reisner 	struct net_conf *nc;
6129668700b4SPhilipp Reisner 	int tcp_cork, err;
6130668700b4SPhilipp Reisner 
6131668700b4SPhilipp Reisner 	rcu_read_lock();
6132668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6133668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6134668700b4SPhilipp Reisner 	rcu_read_unlock();
6135668700b4SPhilipp Reisner 
6136668700b4SPhilipp Reisner 	if (tcp_cork)
6137db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6138668700b4SPhilipp Reisner 
6139668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6140668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6141668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6142668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6143668700b4SPhilipp Reisner 
6144668700b4SPhilipp Reisner 	if (err) {
6145668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6146668700b4SPhilipp Reisner 		return;
6147668700b4SPhilipp Reisner 	}
6148668700b4SPhilipp Reisner 
6149668700b4SPhilipp Reisner 	if (tcp_cork)
6150db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6151668700b4SPhilipp Reisner 
6152668700b4SPhilipp Reisner 	return;
6153668700b4SPhilipp Reisner }
6154