1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_receiver.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner  */
24b411b363SPhilipp Reisner 
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner 
28b411b363SPhilipp Reisner #include <asm/uaccess.h>
29b411b363SPhilipp Reisner #include <net/sock.h>
30b411b363SPhilipp Reisner 
31b411b363SPhilipp Reisner #include <linux/drbd.h>
32b411b363SPhilipp Reisner #include <linux/fs.h>
33b411b363SPhilipp Reisner #include <linux/file.h>
34b411b363SPhilipp Reisner #include <linux/in.h>
35b411b363SPhilipp Reisner #include <linux/mm.h>
36b411b363SPhilipp Reisner #include <linux/memcontrol.h>
37b411b363SPhilipp Reisner #include <linux/mm_inline.h>
38b411b363SPhilipp Reisner #include <linux/slab.h>
39b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
40b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
41b411b363SPhilipp Reisner #include <linux/unistd.h>
42b411b363SPhilipp Reisner #include <linux/vmalloc.h>
43b411b363SPhilipp Reisner #include <linux/random.h>
44b411b363SPhilipp Reisner #include <linux/string.h>
45b411b363SPhilipp Reisner #include <linux/scatterlist.h>
46b411b363SPhilipp Reisner #include "drbd_int.h"
47b411b363SPhilipp Reisner #include "drbd_req.h"
48b411b363SPhilipp Reisner 
49b411b363SPhilipp Reisner #include "drbd_vli.h"
50b411b363SPhilipp Reisner 
5177351055SPhilipp Reisner struct packet_info {
5277351055SPhilipp Reisner 	enum drbd_packet cmd;
53e2857216SAndreas Gruenbacher 	unsigned int size;
54e2857216SAndreas Gruenbacher 	unsigned int vnr;
55e658983aSAndreas Gruenbacher 	void *data;
5677351055SPhilipp Reisner };
5777351055SPhilipp Reisner 
58b411b363SPhilipp Reisner enum finish_epoch {
59b411b363SPhilipp Reisner 	FE_STILL_LIVE,
60b411b363SPhilipp Reisner 	FE_DESTROYED,
61b411b363SPhilipp Reisner 	FE_RECYCLED,
62b411b363SPhilipp Reisner };
63b411b363SPhilipp Reisner 
646038178eSAndreas Gruenbacher static int drbd_do_features(struct drbd_tconn *tconn);
6513e6037dSPhilipp Reisner static int drbd_do_auth(struct drbd_tconn *tconn);
66c141ebdaSPhilipp Reisner static int drbd_disconnected(struct drbd_conf *mdev);
67b411b363SPhilipp Reisner 
681e9dd291SPhilipp Reisner static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
6999920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
70b411b363SPhilipp Reisner 
71b411b363SPhilipp Reisner 
72b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
73b411b363SPhilipp Reisner 
7445bb912bSLars Ellenberg /*
7545bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
7645bb912bSLars Ellenberg  * page->private being our "next" pointer.
7745bb912bSLars Ellenberg  */
7845bb912bSLars Ellenberg 
7945bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
8045bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
8145bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
8245bb912bSLars Ellenberg  */
8345bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
8445bb912bSLars Ellenberg {
8545bb912bSLars Ellenberg 	struct page *page;
8645bb912bSLars Ellenberg 	struct page *tmp;
8745bb912bSLars Ellenberg 
8845bb912bSLars Ellenberg 	BUG_ON(!n);
8945bb912bSLars Ellenberg 	BUG_ON(!head);
9045bb912bSLars Ellenberg 
9145bb912bSLars Ellenberg 	page = *head;
9223ce4227SPhilipp Reisner 
9323ce4227SPhilipp Reisner 	if (!page)
9423ce4227SPhilipp Reisner 		return NULL;
9523ce4227SPhilipp Reisner 
9645bb912bSLars Ellenberg 	while (page) {
9745bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9845bb912bSLars Ellenberg 		if (--n == 0)
9945bb912bSLars Ellenberg 			break; /* found sufficient pages */
10045bb912bSLars Ellenberg 		if (tmp == NULL)
10145bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
10245bb912bSLars Ellenberg 			return NULL;
10345bb912bSLars Ellenberg 		page = tmp;
10445bb912bSLars Ellenberg 	}
10545bb912bSLars Ellenberg 
10645bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10745bb912bSLars Ellenberg 	set_page_private(page, 0);
10845bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10945bb912bSLars Ellenberg 	page = *head;
11045bb912bSLars Ellenberg 	*head = tmp;
11145bb912bSLars Ellenberg 	return page;
11245bb912bSLars Ellenberg }
11345bb912bSLars Ellenberg 
11445bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
11545bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
11645bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11745bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11845bb912bSLars Ellenberg {
11945bb912bSLars Ellenberg 	struct page *tmp;
12045bb912bSLars Ellenberg 	int i = 1;
12145bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page)))
12245bb912bSLars Ellenberg 		++i, page = tmp;
12345bb912bSLars Ellenberg 	if (len)
12445bb912bSLars Ellenberg 		*len = i;
12545bb912bSLars Ellenberg 	return page;
12645bb912bSLars Ellenberg }
12745bb912bSLars Ellenberg 
12845bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12945bb912bSLars Ellenberg {
13045bb912bSLars Ellenberg 	struct page *tmp;
13145bb912bSLars Ellenberg 	int i = 0;
13245bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
13345bb912bSLars Ellenberg 		put_page(page);
13445bb912bSLars Ellenberg 		++i;
13545bb912bSLars Ellenberg 	}
13645bb912bSLars Ellenberg 	return i;
13745bb912bSLars Ellenberg }
13845bb912bSLars Ellenberg 
13945bb912bSLars Ellenberg static void page_chain_add(struct page **head,
14045bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
14145bb912bSLars Ellenberg {
14245bb912bSLars Ellenberg #if 1
14345bb912bSLars Ellenberg 	struct page *tmp;
14445bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14545bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14645bb912bSLars Ellenberg #endif
14745bb912bSLars Ellenberg 
14845bb912bSLars Ellenberg 	/* add chain to head */
14945bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
15045bb912bSLars Ellenberg 	*head = chain_first;
15145bb912bSLars Ellenberg }
15245bb912bSLars Ellenberg 
15318c2d522SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
15418c2d522SAndreas Gruenbacher 				       unsigned int number)
155b411b363SPhilipp Reisner {
156b411b363SPhilipp Reisner 	struct page *page = NULL;
15745bb912bSLars Ellenberg 	struct page *tmp = NULL;
15818c2d522SAndreas Gruenbacher 	unsigned int i = 0;
159b411b363SPhilipp Reisner 
160b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
161b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
16245bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
163b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
16445bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16545bb912bSLars Ellenberg 		if (page)
16645bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
167b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16845bb912bSLars Ellenberg 		if (page)
16945bb912bSLars Ellenberg 			return page;
170b411b363SPhilipp Reisner 	}
17145bb912bSLars Ellenberg 
172b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
174b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17545bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17645bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17745bb912bSLars Ellenberg 		if (!tmp)
17845bb912bSLars Ellenberg 			break;
17945bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
18045bb912bSLars Ellenberg 		page = tmp;
18145bb912bSLars Ellenberg 	}
18245bb912bSLars Ellenberg 
18345bb912bSLars Ellenberg 	if (i == number)
184b411b363SPhilipp Reisner 		return page;
18545bb912bSLars Ellenberg 
18645bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
187c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18845bb912bSLars Ellenberg 	 * function "soon". */
18945bb912bSLars Ellenberg 	if (page) {
19045bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
19145bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
19245bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
19345bb912bSLars Ellenberg 		drbd_pp_vacant += i;
19445bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19545bb912bSLars Ellenberg 	}
19645bb912bSLars Ellenberg 	return NULL;
197b411b363SPhilipp Reisner }
198b411b363SPhilipp Reisner 
199a990be46SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
201b411b363SPhilipp Reisner {
202db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
203b411b363SPhilipp Reisner 	struct list_head *le, *tle;
204b411b363SPhilipp Reisner 
205b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
206b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
207b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
208b411b363SPhilipp Reisner 	   stop to examine the list... */
209b411b363SPhilipp Reisner 
210b411b363SPhilipp Reisner 	list_for_each_safe(le, tle, &mdev->net_ee) {
211db830c46SAndreas Gruenbacher 		peer_req = list_entry(le, struct drbd_peer_request, w.list);
212045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
213b411b363SPhilipp Reisner 			break;
214b411b363SPhilipp Reisner 		list_move(le, to_be_freed);
215b411b363SPhilipp Reisner 	}
216b411b363SPhilipp Reisner }
217b411b363SPhilipp Reisner 
218b411b363SPhilipp Reisner static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219b411b363SPhilipp Reisner {
220b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
221db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
222b411b363SPhilipp Reisner 
22387eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
224a990be46SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
22587eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
226b411b363SPhilipp Reisner 
227db830c46SAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
2283967deb1SAndreas Gruenbacher 		drbd_free_net_peer_req(mdev, peer_req);
229b411b363SPhilipp Reisner }
230b411b363SPhilipp Reisner 
231b411b363SPhilipp Reisner /**
232c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
233b411b363SPhilipp Reisner  * @mdev:	DRBD device.
23445bb912bSLars Ellenberg  * @number:	number of pages requested
23545bb912bSLars Ellenberg  * @retry:	whether to retry, if not enough pages are available right now
236b411b363SPhilipp Reisner  *
23745bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
23845bb912bSLars Ellenberg  * the kernel, unless this allocation would exceed the max_buffers setting.
23945bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
24045bb912bSLars Ellenberg  *
24145bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
242b411b363SPhilipp Reisner  */
243c37c8ecfSAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244c37c8ecfSAndreas Gruenbacher 			      bool retry)
245b411b363SPhilipp Reisner {
246b411b363SPhilipp Reisner 	struct page *page = NULL;
24744ed167dSPhilipp Reisner 	struct net_conf *nc;
248b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
24944ed167dSPhilipp Reisner 	int mxb;
250b411b363SPhilipp Reisner 
25145bb912bSLars Ellenberg 	/* Yes, we may run up to @number over max_buffers. If we
25245bb912bSLars Ellenberg 	 * follow it strictly, the admin will get it wrong anyways. */
25344ed167dSPhilipp Reisner 	rcu_read_lock();
25444ed167dSPhilipp Reisner 	nc = rcu_dereference(mdev->tconn->net_conf);
25544ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
25644ed167dSPhilipp Reisner 	rcu_read_unlock();
25744ed167dSPhilipp Reisner 
25844ed167dSPhilipp Reisner 	if (atomic_read(&mdev->pp_in_use) < mxb)
25918c2d522SAndreas Gruenbacher 		page = __drbd_alloc_pages(mdev, number);
260b411b363SPhilipp Reisner 
26145bb912bSLars Ellenberg 	while (page == NULL) {
262b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263b411b363SPhilipp Reisner 
264b411b363SPhilipp Reisner 		drbd_kick_lo_and_reclaim_net(mdev);
265b411b363SPhilipp Reisner 
26644ed167dSPhilipp Reisner 		if (atomic_read(&mdev->pp_in_use) < mxb) {
26718c2d522SAndreas Gruenbacher 			page = __drbd_alloc_pages(mdev, number);
268b411b363SPhilipp Reisner 			if (page)
269b411b363SPhilipp Reisner 				break;
270b411b363SPhilipp Reisner 		}
271b411b363SPhilipp Reisner 
272b411b363SPhilipp Reisner 		if (!retry)
273b411b363SPhilipp Reisner 			break;
274b411b363SPhilipp Reisner 
275b411b363SPhilipp Reisner 		if (signal_pending(current)) {
276c37c8ecfSAndreas Gruenbacher 			dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
277b411b363SPhilipp Reisner 			break;
278b411b363SPhilipp Reisner 		}
279b411b363SPhilipp Reisner 
280b411b363SPhilipp Reisner 		schedule();
281b411b363SPhilipp Reisner 	}
282b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
283b411b363SPhilipp Reisner 
28445bb912bSLars Ellenberg 	if (page)
28545bb912bSLars Ellenberg 		atomic_add(number, &mdev->pp_in_use);
286b411b363SPhilipp Reisner 	return page;
287b411b363SPhilipp Reisner }
288b411b363SPhilipp Reisner 
289c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
29087eeee41SPhilipp Reisner  * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
29145bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
29245bb912bSLars Ellenberg  * or returns all pages to the system. */
2935cc287e0SAndreas Gruenbacher static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
294b411b363SPhilipp Reisner {
295435f0740SLars Ellenberg 	atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
296b411b363SPhilipp Reisner 	int i;
297435f0740SLars Ellenberg 
298a73ff323SLars Ellenberg 	if (page == NULL)
299a73ff323SLars Ellenberg 		return;
300a73ff323SLars Ellenberg 
3011816a2b4SLars Ellenberg 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
30245bb912bSLars Ellenberg 		i = page_chain_free(page);
30345bb912bSLars Ellenberg 	else {
30445bb912bSLars Ellenberg 		struct page *tmp;
30545bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
306b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
30745bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
30845bb912bSLars Ellenberg 		drbd_pp_vacant += i;
309b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
310b411b363SPhilipp Reisner 	}
311435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
31245bb912bSLars Ellenberg 	if (i < 0)
313435f0740SLars Ellenberg 		dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
314435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
315b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
316b411b363SPhilipp Reisner }
317b411b363SPhilipp Reisner 
318b411b363SPhilipp Reisner /*
319b411b363SPhilipp Reisner You need to hold the req_lock:
320b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
321b411b363SPhilipp Reisner 
322b411b363SPhilipp Reisner You must not have the req_lock:
3233967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3240db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3257721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
326b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
327a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
328b411b363SPhilipp Reisner  drbd_clear_done_ee()
329b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
330b411b363SPhilipp Reisner */
331b411b363SPhilipp Reisner 
332f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
3330db55363SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
334f6ffca9fSAndreas Gruenbacher 		    unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
335b411b363SPhilipp Reisner {
336db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
337a73ff323SLars Ellenberg 	struct page *page = NULL;
33845bb912bSLars Ellenberg 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
339b411b363SPhilipp Reisner 
3400cf9d27eSAndreas Gruenbacher 	if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
341b411b363SPhilipp Reisner 		return NULL;
342b411b363SPhilipp Reisner 
343db830c46SAndreas Gruenbacher 	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
344db830c46SAndreas Gruenbacher 	if (!peer_req) {
345b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
3460db55363SAndreas Gruenbacher 			dev_err(DEV, "%s: allocation failed\n", __func__);
347b411b363SPhilipp Reisner 		return NULL;
348b411b363SPhilipp Reisner 	}
349b411b363SPhilipp Reisner 
350a73ff323SLars Ellenberg 	if (data_size) {
351c37c8ecfSAndreas Gruenbacher 		page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
35245bb912bSLars Ellenberg 		if (!page)
35345bb912bSLars Ellenberg 			goto fail;
354a73ff323SLars Ellenberg 	}
355b411b363SPhilipp Reisner 
356db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
357db830c46SAndreas Gruenbacher 	peer_req->i.size = data_size;
358db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
359db830c46SAndreas Gruenbacher 	peer_req->i.local = false;
360db830c46SAndreas Gruenbacher 	peer_req->i.waiting = false;
361b411b363SPhilipp Reisner 
362db830c46SAndreas Gruenbacher 	peer_req->epoch = NULL;
363a21e9298SPhilipp Reisner 	peer_req->w.mdev = mdev;
364db830c46SAndreas Gruenbacher 	peer_req->pages = page;
365db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, 0);
366db830c46SAndreas Gruenbacher 	peer_req->flags = 0;
3679a8e7753SAndreas Gruenbacher 	/*
3689a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3699a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3709a8e7753SAndreas Gruenbacher 	 */
371db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
372b411b363SPhilipp Reisner 
373db830c46SAndreas Gruenbacher 	return peer_req;
374b411b363SPhilipp Reisner 
37545bb912bSLars Ellenberg  fail:
376db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
377b411b363SPhilipp Reisner 	return NULL;
378b411b363SPhilipp Reisner }
379b411b363SPhilipp Reisner 
3803967deb1SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
381f6ffca9fSAndreas Gruenbacher 		       int is_net)
382b411b363SPhilipp Reisner {
383db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
384db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
3855cc287e0SAndreas Gruenbacher 	drbd_free_pages(mdev, peer_req->pages, is_net);
386db830c46SAndreas Gruenbacher 	D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
387db830c46SAndreas Gruenbacher 	D_ASSERT(drbd_interval_empty(&peer_req->i));
388db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
389b411b363SPhilipp Reisner }
390b411b363SPhilipp Reisner 
3917721f567SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
392b411b363SPhilipp Reisner {
393b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
394db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
395b411b363SPhilipp Reisner 	int count = 0;
396435f0740SLars Ellenberg 	int is_net = list == &mdev->net_ee;
397b411b363SPhilipp Reisner 
39887eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
399b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
40087eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
401b411b363SPhilipp Reisner 
402db830c46SAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
4033967deb1SAndreas Gruenbacher 		__drbd_free_peer_req(mdev, peer_req, is_net);
404b411b363SPhilipp Reisner 		count++;
405b411b363SPhilipp Reisner 	}
406b411b363SPhilipp Reisner 	return count;
407b411b363SPhilipp Reisner }
408b411b363SPhilipp Reisner 
409b411b363SPhilipp Reisner /*
410a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
411b411b363SPhilipp Reisner  */
412a990be46SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
413b411b363SPhilipp Reisner {
414b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
415b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
416db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
417e2b3032bSAndreas Gruenbacher 	int err = 0;
418b411b363SPhilipp Reisner 
41987eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
420a990be46SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
421b411b363SPhilipp Reisner 	list_splice_init(&mdev->done_ee, &work_list);
42287eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
423b411b363SPhilipp Reisner 
424db830c46SAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
4253967deb1SAndreas Gruenbacher 		drbd_free_net_peer_req(mdev, peer_req);
426b411b363SPhilipp Reisner 
427b411b363SPhilipp Reisner 	/* possible callbacks here:
428d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
429b411b363SPhilipp Reisner 	 * all ignore the last argument.
430b411b363SPhilipp Reisner 	 */
431db830c46SAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
432e2b3032bSAndreas Gruenbacher 		int err2;
433e2b3032bSAndreas Gruenbacher 
434b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
435e2b3032bSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
436e2b3032bSAndreas Gruenbacher 		if (!err)
437e2b3032bSAndreas Gruenbacher 			err = err2;
4383967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
439b411b363SPhilipp Reisner 	}
440b411b363SPhilipp Reisner 	wake_up(&mdev->ee_wait);
441b411b363SPhilipp Reisner 
442e2b3032bSAndreas Gruenbacher 	return err;
443b411b363SPhilipp Reisner }
444b411b363SPhilipp Reisner 
445d4da1537SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
446d4da1537SAndreas Gruenbacher 				     struct list_head *head)
447b411b363SPhilipp Reisner {
448b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
449b411b363SPhilipp Reisner 
450b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
451b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
452b411b363SPhilipp Reisner 	while (!list_empty(head)) {
453b411b363SPhilipp Reisner 		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
45487eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
4557eaceaccSJens Axboe 		io_schedule();
456b411b363SPhilipp Reisner 		finish_wait(&mdev->ee_wait, &wait);
45787eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
458b411b363SPhilipp Reisner 	}
459b411b363SPhilipp Reisner }
460b411b363SPhilipp Reisner 
461d4da1537SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
462d4da1537SAndreas Gruenbacher 				    struct list_head *head)
463b411b363SPhilipp Reisner {
46487eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
465b411b363SPhilipp Reisner 	_drbd_wait_ee_list_empty(mdev, head);
46687eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
467b411b363SPhilipp Reisner }
468b411b363SPhilipp Reisner 
469dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
470b411b363SPhilipp Reisner {
471b411b363SPhilipp Reisner 	mm_segment_t oldfs;
472b411b363SPhilipp Reisner 	struct kvec iov = {
473b411b363SPhilipp Reisner 		.iov_base = buf,
474b411b363SPhilipp Reisner 		.iov_len = size,
475b411b363SPhilipp Reisner 	};
476b411b363SPhilipp Reisner 	struct msghdr msg = {
477b411b363SPhilipp Reisner 		.msg_iovlen = 1,
478b411b363SPhilipp Reisner 		.msg_iov = (struct iovec *)&iov,
479b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
480b411b363SPhilipp Reisner 	};
481b411b363SPhilipp Reisner 	int rv;
482b411b363SPhilipp Reisner 
483b411b363SPhilipp Reisner 	oldfs = get_fs();
484b411b363SPhilipp Reisner 	set_fs(KERNEL_DS);
485b411b363SPhilipp Reisner 	rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
486b411b363SPhilipp Reisner 	set_fs(oldfs);
487b411b363SPhilipp Reisner 
488b411b363SPhilipp Reisner 	return rv;
489b411b363SPhilipp Reisner }
490b411b363SPhilipp Reisner 
491de0ff338SPhilipp Reisner static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
492b411b363SPhilipp Reisner {
493b411b363SPhilipp Reisner 	int rv;
494b411b363SPhilipp Reisner 
4951393b59fSPhilipp Reisner 	rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
496b411b363SPhilipp Reisner 
497b411b363SPhilipp Reisner 	if (rv < 0) {
498b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
499de0ff338SPhilipp Reisner 			conn_info(tconn, "sock was reset by peer\n");
500b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
501de0ff338SPhilipp Reisner 			conn_err(tconn, "sock_recvmsg returned %d\n", rv);
502b411b363SPhilipp Reisner 	} else if (rv == 0) {
503b66623e3SPhilipp Reisner 		if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
504b66623e3SPhilipp Reisner 			long t;
505b66623e3SPhilipp Reisner 			rcu_read_lock();
506b66623e3SPhilipp Reisner 			t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
507b66623e3SPhilipp Reisner 			rcu_read_unlock();
508b66623e3SPhilipp Reisner 
509b66623e3SPhilipp Reisner 			t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
510b66623e3SPhilipp Reisner 
511599377acSPhilipp Reisner 			if (t)
512599377acSPhilipp Reisner 				goto out;
513599377acSPhilipp Reisner 		}
514b66623e3SPhilipp Reisner 		conn_info(tconn, "sock was shut down by peer\n");
515599377acSPhilipp Reisner 	}
516599377acSPhilipp Reisner 
517b411b363SPhilipp Reisner 	if (rv != size)
518bbeb641cSPhilipp Reisner 		conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
519b411b363SPhilipp Reisner 
520599377acSPhilipp Reisner out:
521b411b363SPhilipp Reisner 	return rv;
522b411b363SPhilipp Reisner }
523b411b363SPhilipp Reisner 
524c6967746SAndreas Gruenbacher static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
525c6967746SAndreas Gruenbacher {
526c6967746SAndreas Gruenbacher 	int err;
527c6967746SAndreas Gruenbacher 
528c6967746SAndreas Gruenbacher 	err = drbd_recv(tconn, buf, size);
529c6967746SAndreas Gruenbacher 	if (err != size) {
530c6967746SAndreas Gruenbacher 		if (err >= 0)
531c6967746SAndreas Gruenbacher 			err = -EIO;
532c6967746SAndreas Gruenbacher 	} else
533c6967746SAndreas Gruenbacher 		err = 0;
534c6967746SAndreas Gruenbacher 	return err;
535c6967746SAndreas Gruenbacher }
536c6967746SAndreas Gruenbacher 
537a5c31904SAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
538a5c31904SAndreas Gruenbacher {
539a5c31904SAndreas Gruenbacher 	int err;
540a5c31904SAndreas Gruenbacher 
541a5c31904SAndreas Gruenbacher 	err = drbd_recv_all(tconn, buf, size);
542a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
543a5c31904SAndreas Gruenbacher 		conn_warn(tconn, "short read (expected size %d)\n", (int)size);
544a5c31904SAndreas Gruenbacher 	return err;
545a5c31904SAndreas Gruenbacher }
546a5c31904SAndreas Gruenbacher 
5475dbf1673SLars Ellenberg /* quoting tcp(7):
5485dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5495dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5505dbf1673SLars Ellenberg  * This is our wrapper to do so.
5515dbf1673SLars Ellenberg  */
5525dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5535dbf1673SLars Ellenberg 		unsigned int rcv)
5545dbf1673SLars Ellenberg {
5555dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5565dbf1673SLars Ellenberg 	if (snd) {
5575dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5585dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5595dbf1673SLars Ellenberg 	}
5605dbf1673SLars Ellenberg 	if (rcv) {
5615dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5625dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5635dbf1673SLars Ellenberg 	}
5645dbf1673SLars Ellenberg }
5655dbf1673SLars Ellenberg 
566eac3e990SPhilipp Reisner static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
567b411b363SPhilipp Reisner {
568b411b363SPhilipp Reisner 	const char *what;
569b411b363SPhilipp Reisner 	struct socket *sock;
570b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
57144ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
57244ed167dSPhilipp Reisner 	struct net_conf *nc;
57344ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
57469ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
575b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
576b411b363SPhilipp Reisner 
57744ed167dSPhilipp Reisner 	rcu_read_lock();
57844ed167dSPhilipp Reisner 	nc = rcu_dereference(tconn->net_conf);
57944ed167dSPhilipp Reisner 	if (!nc) {
58044ed167dSPhilipp Reisner 		rcu_read_unlock();
581b411b363SPhilipp Reisner 		return NULL;
58244ed167dSPhilipp Reisner 	}
58344ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
58444ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
58569ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
586089c075dSAndreas Gruenbacher 	rcu_read_unlock();
58744ed167dSPhilipp Reisner 
588089c075dSAndreas Gruenbacher 	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
589089c075dSAndreas Gruenbacher 	memcpy(&src_in6, &tconn->my_addr, my_addr_len);
59044ed167dSPhilipp Reisner 
591089c075dSAndreas Gruenbacher 	if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
59244ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
59344ed167dSPhilipp Reisner 	else
59444ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
59544ed167dSPhilipp Reisner 
596089c075dSAndreas Gruenbacher 	peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
597089c075dSAndreas Gruenbacher 	memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
598b411b363SPhilipp Reisner 
599b411b363SPhilipp Reisner 	what = "sock_create_kern";
60044ed167dSPhilipp Reisner 	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
601b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
602b411b363SPhilipp Reisner 	if (err < 0) {
603b411b363SPhilipp Reisner 		sock = NULL;
604b411b363SPhilipp Reisner 		goto out;
605b411b363SPhilipp Reisner 	}
606b411b363SPhilipp Reisner 
607b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
60869ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
60944ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
610b411b363SPhilipp Reisner 
611b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
612b411b363SPhilipp Reisner 	*  for the outgoing connections.
613b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
614b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
615b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
616b411b363SPhilipp Reisner 	*  a free one dynamically.
617b411b363SPhilipp Reisner 	*/
618b411b363SPhilipp Reisner 	what = "bind before connect";
61944ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
620b411b363SPhilipp Reisner 	if (err < 0)
621b411b363SPhilipp Reisner 		goto out;
622b411b363SPhilipp Reisner 
623b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
624b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
625b411b363SPhilipp Reisner 	disconnect_on_error = 0;
626b411b363SPhilipp Reisner 	what = "connect";
62744ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
628b411b363SPhilipp Reisner 
629b411b363SPhilipp Reisner out:
630b411b363SPhilipp Reisner 	if (err < 0) {
631b411b363SPhilipp Reisner 		if (sock) {
632b411b363SPhilipp Reisner 			sock_release(sock);
633b411b363SPhilipp Reisner 			sock = NULL;
634b411b363SPhilipp Reisner 		}
635b411b363SPhilipp Reisner 		switch (-err) {
636b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
637b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
638b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
639b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
640b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
641b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
642b411b363SPhilipp Reisner 			disconnect_on_error = 0;
643b411b363SPhilipp Reisner 			break;
644b411b363SPhilipp Reisner 		default:
645eac3e990SPhilipp Reisner 			conn_err(tconn, "%s failed, err = %d\n", what, err);
646b411b363SPhilipp Reisner 		}
647b411b363SPhilipp Reisner 		if (disconnect_on_error)
648bbeb641cSPhilipp Reisner 			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
649b411b363SPhilipp Reisner 	}
65044ed167dSPhilipp Reisner 
651b411b363SPhilipp Reisner 	return sock;
652b411b363SPhilipp Reisner }
653b411b363SPhilipp Reisner 
6547a426fd8SPhilipp Reisner struct accept_wait_data {
6557a426fd8SPhilipp Reisner 	struct drbd_tconn *tconn;
6567a426fd8SPhilipp Reisner 	struct socket *s_listen;
6577a426fd8SPhilipp Reisner 	struct completion door_bell;
6587a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6597a426fd8SPhilipp Reisner 
6607a426fd8SPhilipp Reisner };
6617a426fd8SPhilipp Reisner 
662715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
663b411b363SPhilipp Reisner {
6647a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
665715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6667a426fd8SPhilipp Reisner 
667715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
668715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6697a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
670715306f6SAndreas Gruenbacher 	state_change(sk);
6717a426fd8SPhilipp Reisner }
6727a426fd8SPhilipp Reisner 
6737a426fd8SPhilipp Reisner static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
674b411b363SPhilipp Reisner {
6751f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
67644ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
6771f3e509bSPhilipp Reisner 	struct socket *s_listen;
67844ed167dSPhilipp Reisner 	struct net_conf *nc;
679b411b363SPhilipp Reisner 	const char *what;
680b411b363SPhilipp Reisner 
68144ed167dSPhilipp Reisner 	rcu_read_lock();
68244ed167dSPhilipp Reisner 	nc = rcu_dereference(tconn->net_conf);
68344ed167dSPhilipp Reisner 	if (!nc) {
68444ed167dSPhilipp Reisner 		rcu_read_unlock();
6857a426fd8SPhilipp Reisner 		return -EIO;
68644ed167dSPhilipp Reisner 	}
68744ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
68844ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
68944ed167dSPhilipp Reisner 	rcu_read_unlock();
690b411b363SPhilipp Reisner 
691089c075dSAndreas Gruenbacher 	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
692089c075dSAndreas Gruenbacher 	memcpy(&my_addr, &tconn->my_addr, my_addr_len);
693b411b363SPhilipp Reisner 
694b411b363SPhilipp Reisner 	what = "sock_create_kern";
69544ed167dSPhilipp Reisner 	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
696b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
697b411b363SPhilipp Reisner 	if (err) {
698b411b363SPhilipp Reisner 		s_listen = NULL;
699b411b363SPhilipp Reisner 		goto out;
700b411b363SPhilipp Reisner 	}
701b411b363SPhilipp Reisner 
7024a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
70344ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
704b411b363SPhilipp Reisner 
705b411b363SPhilipp Reisner 	what = "bind before listen";
70644ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
707b411b363SPhilipp Reisner 	if (err < 0)
708b411b363SPhilipp Reisner 		goto out;
709b411b363SPhilipp Reisner 
7107a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7117a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7127a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
713715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7147a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7157a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
716b411b363SPhilipp Reisner 
7172820fd39SPhilipp Reisner 	what = "listen";
7182820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7192820fd39SPhilipp Reisner 	if (err < 0)
7202820fd39SPhilipp Reisner 		goto out;
7212820fd39SPhilipp Reisner 
7227a426fd8SPhilipp Reisner 	return 0;
723b411b363SPhilipp Reisner out:
724b411b363SPhilipp Reisner 	if (s_listen)
725b411b363SPhilipp Reisner 		sock_release(s_listen);
726b411b363SPhilipp Reisner 	if (err < 0) {
727b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7281f3e509bSPhilipp Reisner 			conn_err(tconn, "%s failed, err = %d\n", what, err);
7291f3e509bSPhilipp Reisner 			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
730b411b363SPhilipp Reisner 		}
731b411b363SPhilipp Reisner 	}
7321f3e509bSPhilipp Reisner 
7337a426fd8SPhilipp Reisner 	return -EIO;
7341f3e509bSPhilipp Reisner }
7351f3e509bSPhilipp Reisner 
736715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
737715306f6SAndreas Gruenbacher {
738715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
739715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
740715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
741715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
742715306f6SAndreas Gruenbacher }
743715306f6SAndreas Gruenbacher 
7447a426fd8SPhilipp Reisner static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
7451f3e509bSPhilipp Reisner {
7461f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7471f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7481f3e509bSPhilipp Reisner 	struct net_conf *nc;
7491f3e509bSPhilipp Reisner 
7501f3e509bSPhilipp Reisner 	rcu_read_lock();
7511f3e509bSPhilipp Reisner 	nc = rcu_dereference(tconn->net_conf);
7521f3e509bSPhilipp Reisner 	if (!nc) {
7531f3e509bSPhilipp Reisner 		rcu_read_unlock();
7541f3e509bSPhilipp Reisner 		return NULL;
7551f3e509bSPhilipp Reisner 	}
7561f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7571f3e509bSPhilipp Reisner 	rcu_read_unlock();
7581f3e509bSPhilipp Reisner 
7591f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
7601f3e509bSPhilipp Reisner 	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
7611f3e509bSPhilipp Reisner 
7627a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7637a426fd8SPhilipp Reisner 	if (err <= 0)
7647a426fd8SPhilipp Reisner 		return NULL;
7651f3e509bSPhilipp Reisner 
7667a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
767b411b363SPhilipp Reisner 	if (err < 0) {
768b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7691f3e509bSPhilipp Reisner 			conn_err(tconn, "accept failed, err = %d\n", err);
770bbeb641cSPhilipp Reisner 			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
771b411b363SPhilipp Reisner 		}
772b411b363SPhilipp Reisner 	}
773b411b363SPhilipp Reisner 
774715306f6SAndreas Gruenbacher 	if (s_estab)
775715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
776b411b363SPhilipp Reisner 
777b411b363SPhilipp Reisner 	return s_estab;
778b411b363SPhilipp Reisner }
779b411b363SPhilipp Reisner 
780e658983aSAndreas Gruenbacher static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
781b411b363SPhilipp Reisner 
7829f5bdc33SAndreas Gruenbacher static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
7839f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
7849f5bdc33SAndreas Gruenbacher {
7859f5bdc33SAndreas Gruenbacher 	if (!conn_prepare_command(tconn, sock))
7869f5bdc33SAndreas Gruenbacher 		return -EIO;
787e658983aSAndreas Gruenbacher 	return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
788b411b363SPhilipp Reisner }
789b411b363SPhilipp Reisner 
7909f5bdc33SAndreas Gruenbacher static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
791b411b363SPhilipp Reisner {
7929f5bdc33SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(tconn);
7939f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
7949f5bdc33SAndreas Gruenbacher 	int err;
795b411b363SPhilipp Reisner 
7969f5bdc33SAndreas Gruenbacher 	err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
7979f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
7989f5bdc33SAndreas Gruenbacher 		if (err >= 0)
7999f5bdc33SAndreas Gruenbacher 			err = -EIO;
8009f5bdc33SAndreas Gruenbacher 		return err;
8019f5bdc33SAndreas Gruenbacher 	}
8029f5bdc33SAndreas Gruenbacher 	err = decode_header(tconn, tconn->data.rbuf, &pi);
8039f5bdc33SAndreas Gruenbacher 	if (err)
8049f5bdc33SAndreas Gruenbacher 		return err;
8059f5bdc33SAndreas Gruenbacher 	return pi.cmd;
806b411b363SPhilipp Reisner }
807b411b363SPhilipp Reisner 
808b411b363SPhilipp Reisner /**
809b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
810b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
811b411b363SPhilipp Reisner  */
812dbd9eea0SPhilipp Reisner static int drbd_socket_okay(struct socket **sock)
813b411b363SPhilipp Reisner {
814b411b363SPhilipp Reisner 	int rr;
815b411b363SPhilipp Reisner 	char tb[4];
816b411b363SPhilipp Reisner 
817b411b363SPhilipp Reisner 	if (!*sock)
81881e84650SAndreas Gruenbacher 		return false;
819b411b363SPhilipp Reisner 
820dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
821b411b363SPhilipp Reisner 
822b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
82381e84650SAndreas Gruenbacher 		return true;
824b411b363SPhilipp Reisner 	} else {
825b411b363SPhilipp Reisner 		sock_release(*sock);
826b411b363SPhilipp Reisner 		*sock = NULL;
82781e84650SAndreas Gruenbacher 		return false;
828b411b363SPhilipp Reisner 	}
829b411b363SPhilipp Reisner }
8302325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8312325eb66SPhilipp Reisner    in a connection */
832c141ebdaSPhilipp Reisner int drbd_connected(struct drbd_conf *mdev)
833907599e0SPhilipp Reisner {
8340829f5edSAndreas Gruenbacher 	int err;
835907599e0SPhilipp Reisner 
836907599e0SPhilipp Reisner 	atomic_set(&mdev->packet_seq, 0);
837907599e0SPhilipp Reisner 	mdev->peer_seq = 0;
838907599e0SPhilipp Reisner 
8398410da8fSPhilipp Reisner 	mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
8408410da8fSPhilipp Reisner 		&mdev->tconn->cstate_mutex :
8418410da8fSPhilipp Reisner 		&mdev->own_state_mutex;
8428410da8fSPhilipp Reisner 
8430829f5edSAndreas Gruenbacher 	err = drbd_send_sync_param(mdev);
8440829f5edSAndreas Gruenbacher 	if (!err)
8450829f5edSAndreas Gruenbacher 		err = drbd_send_sizes(mdev, 0, 0);
8460829f5edSAndreas Gruenbacher 	if (!err)
8470829f5edSAndreas Gruenbacher 		err = drbd_send_uuids(mdev);
8480829f5edSAndreas Gruenbacher 	if (!err)
84943de7c85SPhilipp Reisner 		err = drbd_send_current_state(mdev);
850907599e0SPhilipp Reisner 	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
851907599e0SPhilipp Reisner 	clear_bit(RESIZE_PENDING, &mdev->flags);
8528b924f1dSPhilipp Reisner 	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
8530829f5edSAndreas Gruenbacher 	return err;
854907599e0SPhilipp Reisner }
855b411b363SPhilipp Reisner 
856b411b363SPhilipp Reisner /*
857b411b363SPhilipp Reisner  * return values:
858b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
859b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
860b411b363SPhilipp Reisner  *  -1 peer talks different language,
861b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
862b411b363SPhilipp Reisner  *  -2 We do not have a network config...
863b411b363SPhilipp Reisner  */
86481fa2e67SPhilipp Reisner static int conn_connect(struct drbd_tconn *tconn)
865b411b363SPhilipp Reisner {
8667da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
867c141ebdaSPhilipp Reisner 	struct drbd_conf *mdev;
86844ed167dSPhilipp Reisner 	struct net_conf *nc;
86992f14951SPhilipp Reisner 	int vnr, timeout, h, ok;
87008b165baSPhilipp Reisner 	bool discard_my_data;
871197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
8727a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
8737a426fd8SPhilipp Reisner 		.tconn = tconn,
8747a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
8757a426fd8SPhilipp Reisner 	};
876b411b363SPhilipp Reisner 
877b66623e3SPhilipp Reisner 	clear_bit(DISCONNECT_SENT, &tconn->flags);
878bbeb641cSPhilipp Reisner 	if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
879b411b363SPhilipp Reisner 		return -2;
880b411b363SPhilipp Reisner 
8817da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
8827da35862SPhilipp Reisner 	sock.sbuf = tconn->data.sbuf;
8837da35862SPhilipp Reisner 	sock.rbuf = tconn->data.rbuf;
8847da35862SPhilipp Reisner 	sock.socket = NULL;
8857da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
8867da35862SPhilipp Reisner 	msock.sbuf = tconn->meta.sbuf;
8877da35862SPhilipp Reisner 	msock.rbuf = tconn->meta.rbuf;
8887da35862SPhilipp Reisner 	msock.socket = NULL;
8897da35862SPhilipp Reisner 
8900916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
8910916e0e3SAndreas Gruenbacher 	tconn->agreed_pro_version = 80;
892b411b363SPhilipp Reisner 
8937a426fd8SPhilipp Reisner 	if (prepare_listen_socket(tconn, &ad))
8947a426fd8SPhilipp Reisner 		return 0;
895b411b363SPhilipp Reisner 
896b411b363SPhilipp Reisner 	do {
8972bf89621SAndreas Gruenbacher 		struct socket *s;
898b411b363SPhilipp Reisner 
899907599e0SPhilipp Reisner 		s = drbd_try_connect(tconn);
900b411b363SPhilipp Reisner 		if (s) {
9017da35862SPhilipp Reisner 			if (!sock.socket) {
9027da35862SPhilipp Reisner 				sock.socket = s;
9037da35862SPhilipp Reisner 				send_first_packet(tconn, &sock, P_INITIAL_DATA);
9047da35862SPhilipp Reisner 			} else if (!msock.socket) {
905427c0434SLars Ellenberg 				clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
9067da35862SPhilipp Reisner 				msock.socket = s;
9077da35862SPhilipp Reisner 				send_first_packet(tconn, &msock, P_INITIAL_META);
908b411b363SPhilipp Reisner 			} else {
90981fa2e67SPhilipp Reisner 				conn_err(tconn, "Logic error in conn_connect()\n");
910b411b363SPhilipp Reisner 				goto out_release_sockets;
911b411b363SPhilipp Reisner 			}
912b411b363SPhilipp Reisner 		}
913b411b363SPhilipp Reisner 
9147da35862SPhilipp Reisner 		if (sock.socket && msock.socket) {
9157da35862SPhilipp Reisner 			rcu_read_lock();
9167da35862SPhilipp Reisner 			nc = rcu_dereference(tconn->net_conf);
9177da35862SPhilipp Reisner 			timeout = nc->ping_timeo * HZ / 10;
9187da35862SPhilipp Reisner 			rcu_read_unlock();
9197da35862SPhilipp Reisner 			schedule_timeout_interruptible(timeout);
9207da35862SPhilipp Reisner 			ok = drbd_socket_okay(&sock.socket);
9217da35862SPhilipp Reisner 			ok = drbd_socket_okay(&msock.socket) && ok;
922b411b363SPhilipp Reisner 			if (ok)
923b411b363SPhilipp Reisner 				break;
924b411b363SPhilipp Reisner 		}
925b411b363SPhilipp Reisner 
926b411b363SPhilipp Reisner retry:
9277a426fd8SPhilipp Reisner 		s = drbd_wait_for_connect(tconn, &ad);
928b411b363SPhilipp Reisner 		if (s) {
92992f14951SPhilipp Reisner 			int fp = receive_first_packet(tconn, s);
9307da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9317da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
93292f14951SPhilipp Reisner 			switch (fp) {
933e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9347da35862SPhilipp Reisner 				if (sock.socket) {
935907599e0SPhilipp Reisner 					conn_warn(tconn, "initial packet S crossed\n");
9367da35862SPhilipp Reisner 					sock_release(sock.socket);
93780c6eed4SPhilipp Reisner 					sock.socket = s;
93880c6eed4SPhilipp Reisner 					goto randomize;
939b411b363SPhilipp Reisner 				}
9407da35862SPhilipp Reisner 				sock.socket = s;
941b411b363SPhilipp Reisner 				break;
942e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
943427c0434SLars Ellenberg 				set_bit(RESOLVE_CONFLICTS, &tconn->flags);
9447da35862SPhilipp Reisner 				if (msock.socket) {
945907599e0SPhilipp Reisner 					conn_warn(tconn, "initial packet M crossed\n");
9467da35862SPhilipp Reisner 					sock_release(msock.socket);
94780c6eed4SPhilipp Reisner 					msock.socket = s;
94880c6eed4SPhilipp Reisner 					goto randomize;
949b411b363SPhilipp Reisner 				}
9507da35862SPhilipp Reisner 				msock.socket = s;
951b411b363SPhilipp Reisner 				break;
952b411b363SPhilipp Reisner 			default:
953907599e0SPhilipp Reisner 				conn_warn(tconn, "Error receiving initial packet\n");
954b411b363SPhilipp Reisner 				sock_release(s);
95580c6eed4SPhilipp Reisner randomize:
956b411b363SPhilipp Reisner 				if (random32() & 1)
957b411b363SPhilipp Reisner 					goto retry;
958b411b363SPhilipp Reisner 			}
959b411b363SPhilipp Reisner 		}
960b411b363SPhilipp Reisner 
961bbeb641cSPhilipp Reisner 		if (tconn->cstate <= C_DISCONNECTING)
962b411b363SPhilipp Reisner 			goto out_release_sockets;
963b411b363SPhilipp Reisner 		if (signal_pending(current)) {
964b411b363SPhilipp Reisner 			flush_signals(current);
965b411b363SPhilipp Reisner 			smp_rmb();
966907599e0SPhilipp Reisner 			if (get_t_state(&tconn->receiver) == EXITING)
967b411b363SPhilipp Reisner 				goto out_release_sockets;
968b411b363SPhilipp Reisner 		}
969b411b363SPhilipp Reisner 
9707da35862SPhilipp Reisner 		ok = drbd_socket_okay(&sock.socket);
9717da35862SPhilipp Reisner 		ok = drbd_socket_okay(&msock.socket) && ok;
972b666dbf8SPhilipp Reisner 	} while (!ok);
973b411b363SPhilipp Reisner 
9747a426fd8SPhilipp Reisner 	if (ad.s_listen)
9757a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
976b411b363SPhilipp Reisner 
97798683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
97898683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
979b411b363SPhilipp Reisner 
9807da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
9817da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
982b411b363SPhilipp Reisner 
9837da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
9847da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
985b411b363SPhilipp Reisner 
986b411b363SPhilipp Reisner 	/* NOT YET ...
9877da35862SPhilipp Reisner 	 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
9887da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
9896038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
990b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
99144ed167dSPhilipp Reisner 	rcu_read_lock();
99244ed167dSPhilipp Reisner 	nc = rcu_dereference(tconn->net_conf);
993b411b363SPhilipp Reisner 
9947da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
9957da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
99644ed167dSPhilipp Reisner 
9977da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
99844ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
99908b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
100044ed167dSPhilipp Reisner 	rcu_read_unlock();
100144ed167dSPhilipp Reisner 
10027da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1003b411b363SPhilipp Reisner 
1004b411b363SPhilipp Reisner 	/* we don't want delays.
100525985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
10067da35862SPhilipp Reisner 	drbd_tcp_nodelay(sock.socket);
10077da35862SPhilipp Reisner 	drbd_tcp_nodelay(msock.socket);
1008b411b363SPhilipp Reisner 
10097da35862SPhilipp Reisner 	tconn->data.socket = sock.socket;
10107da35862SPhilipp Reisner 	tconn->meta.socket = msock.socket;
1011907599e0SPhilipp Reisner 	tconn->last_received = jiffies;
1012b411b363SPhilipp Reisner 
10136038178eSAndreas Gruenbacher 	h = drbd_do_features(tconn);
1014b411b363SPhilipp Reisner 	if (h <= 0)
1015b411b363SPhilipp Reisner 		return h;
1016b411b363SPhilipp Reisner 
1017907599e0SPhilipp Reisner 	if (tconn->cram_hmac_tfm) {
1018b411b363SPhilipp Reisner 		/* drbd_request_state(mdev, NS(conn, WFAuth)); */
1019907599e0SPhilipp Reisner 		switch (drbd_do_auth(tconn)) {
1020b10d96cbSJohannes Thoma 		case -1:
1021907599e0SPhilipp Reisner 			conn_err(tconn, "Authentication of peer failed\n");
1022b411b363SPhilipp Reisner 			return -1;
1023b10d96cbSJohannes Thoma 		case 0:
1024907599e0SPhilipp Reisner 			conn_err(tconn, "Authentication of peer failed, trying again.\n");
1025b10d96cbSJohannes Thoma 			return 0;
1026b411b363SPhilipp Reisner 		}
1027b411b363SPhilipp Reisner 	}
1028b411b363SPhilipp Reisner 
10297da35862SPhilipp Reisner 	tconn->data.socket->sk->sk_sndtimeo = timeout;
10307da35862SPhilipp Reisner 	tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1031b411b363SPhilipp Reisner 
1032387eb308SAndreas Gruenbacher 	if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
10337e2455c1SPhilipp Reisner 		return -1;
10341e86ac48SPhilipp Reisner 
1035a1096a6eSPhilipp Reisner 	set_bit(STATE_SENT, &tconn->flags);
1036197296ffSPhilipp Reisner 
1037c141ebdaSPhilipp Reisner 	rcu_read_lock();
1038c141ebdaSPhilipp Reisner 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1039c141ebdaSPhilipp Reisner 		kref_get(&mdev->kref);
1040c141ebdaSPhilipp Reisner 		rcu_read_unlock();
104108b165baSPhilipp Reisner 
104208b165baSPhilipp Reisner 		if (discard_my_data)
104308b165baSPhilipp Reisner 			set_bit(DISCARD_MY_DATA, &mdev->flags);
104408b165baSPhilipp Reisner 		else
104508b165baSPhilipp Reisner 			clear_bit(DISCARD_MY_DATA, &mdev->flags);
104608b165baSPhilipp Reisner 
1047c141ebdaSPhilipp Reisner 		drbd_connected(mdev);
1048c141ebdaSPhilipp Reisner 		kref_put(&mdev->kref, &drbd_minor_destroy);
1049c141ebdaSPhilipp Reisner 		rcu_read_lock();
1050c141ebdaSPhilipp Reisner 	}
1051c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1052c141ebdaSPhilipp Reisner 
1053a1096a6eSPhilipp Reisner 	rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1054ed635cb0SLars Ellenberg 	if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
1055a1096a6eSPhilipp Reisner 		clear_bit(STATE_SENT, &tconn->flags);
10561e86ac48SPhilipp Reisner 		return 0;
1057a1096a6eSPhilipp Reisner 	}
10581e86ac48SPhilipp Reisner 
1059823bd832SPhilipp Reisner 	drbd_thread_start(&tconn->asender);
1060b411b363SPhilipp Reisner 
106108b165baSPhilipp Reisner 	mutex_lock(&tconn->conf_update);
106208b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
106308b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
106408b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
106508b165baSPhilipp Reisner 	 * just to clear a single value. */
106608b165baSPhilipp Reisner 	tconn->net_conf->discard_my_data = 0;
106708b165baSPhilipp Reisner 	mutex_unlock(&tconn->conf_update);
106808b165baSPhilipp Reisner 
1069d3fcb490SPhilipp Reisner 	return h;
1070b411b363SPhilipp Reisner 
1071b411b363SPhilipp Reisner out_release_sockets:
10727a426fd8SPhilipp Reisner 	if (ad.s_listen)
10737a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
10747da35862SPhilipp Reisner 	if (sock.socket)
10757da35862SPhilipp Reisner 		sock_release(sock.socket);
10767da35862SPhilipp Reisner 	if (msock.socket)
10777da35862SPhilipp Reisner 		sock_release(msock.socket);
1078b411b363SPhilipp Reisner 	return -1;
1079b411b363SPhilipp Reisner }
1080b411b363SPhilipp Reisner 
1081e658983aSAndreas Gruenbacher static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
1082b411b363SPhilipp Reisner {
1083e658983aSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(tconn);
1084b411b363SPhilipp Reisner 
10850c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
10860c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
10870c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
10880c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
10890c8e36d9SAndreas Gruenbacher 			conn_err(tconn, "Header padding is not zero\n");
10900c8e36d9SAndreas Gruenbacher 			return -EINVAL;
109102918be2SPhilipp Reisner 		}
10920c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
10930c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
10940c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
10950c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1096e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1097e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1098e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1099b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1100eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1101e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1102e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1103e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1104e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1105e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
110677351055SPhilipp Reisner 		pi->vnr = 0;
110702918be2SPhilipp Reisner 	} else {
1108e658983aSAndreas Gruenbacher 		conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1109e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1110e658983aSAndreas Gruenbacher 			 tconn->agreed_pro_version);
11118172f3e9SAndreas Gruenbacher 		return -EINVAL;
1112b411b363SPhilipp Reisner 	}
1113e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11148172f3e9SAndreas Gruenbacher 	return 0;
1115b411b363SPhilipp Reisner }
1116b411b363SPhilipp Reisner 
11179ba7aa00SPhilipp Reisner static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
1118257d0af6SPhilipp Reisner {
1119e658983aSAndreas Gruenbacher 	void *buffer = tconn->data.rbuf;
112069bc7bc3SAndreas Gruenbacher 	int err;
1121257d0af6SPhilipp Reisner 
1122e658983aSAndreas Gruenbacher 	err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
1123a5c31904SAndreas Gruenbacher 	if (err)
112469bc7bc3SAndreas Gruenbacher 		return err;
1125257d0af6SPhilipp Reisner 
1126e658983aSAndreas Gruenbacher 	err = decode_header(tconn, buffer, pi);
11279ba7aa00SPhilipp Reisner 	tconn->last_received = jiffies;
1128b411b363SPhilipp Reisner 
112969bc7bc3SAndreas Gruenbacher 	return err;
1130b411b363SPhilipp Reisner }
1131b411b363SPhilipp Reisner 
11324b0007c0SPhilipp Reisner static void drbd_flush(struct drbd_tconn *tconn)
1133b411b363SPhilipp Reisner {
1134b411b363SPhilipp Reisner 	int rv;
11354b0007c0SPhilipp Reisner 	struct drbd_conf *mdev;
11364b0007c0SPhilipp Reisner 	int vnr;
1137b411b363SPhilipp Reisner 
11384b0007c0SPhilipp Reisner 	if (tconn->write_ordering >= WO_bdev_flush) {
1139615e087fSLars Ellenberg 		rcu_read_lock();
11404b0007c0SPhilipp Reisner 		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1141615e087fSLars Ellenberg 			if (!get_ldev(mdev))
1142615e087fSLars Ellenberg 				continue;
1143615e087fSLars Ellenberg 			kref_get(&mdev->kref);
1144615e087fSLars Ellenberg 			rcu_read_unlock();
11454b0007c0SPhilipp Reisner 
1146615e087fSLars Ellenberg 			rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1147615e087fSLars Ellenberg 					GFP_NOIO, NULL);
1148b411b363SPhilipp Reisner 			if (rv) {
1149ebd2b0cdSPhilipp Reisner 				dev_info(DEV, "local disk flush failed with status %d\n", rv);
1150b411b363SPhilipp Reisner 				/* would rather check on EOPNOTSUPP, but that is not reliable.
1151b411b363SPhilipp Reisner 				 * don't try again for ANY return value != 0
1152b411b363SPhilipp Reisner 				 * if (rv == -EOPNOTSUPP) */
11534b0007c0SPhilipp Reisner 				drbd_bump_write_ordering(tconn, WO_drain_io);
1154b411b363SPhilipp Reisner 			}
1155b411b363SPhilipp Reisner 			put_ldev(mdev);
1156615e087fSLars Ellenberg 			kref_put(&mdev->kref, &drbd_minor_destroy);
1157615e087fSLars Ellenberg 
1158615e087fSLars Ellenberg 			rcu_read_lock();
1159615e087fSLars Ellenberg 			if (rv)
11604b0007c0SPhilipp Reisner 				break;
1161b411b363SPhilipp Reisner 		}
1162615e087fSLars Ellenberg 		rcu_read_unlock();
1163b411b363SPhilipp Reisner 	}
1164b411b363SPhilipp Reisner }
1165b411b363SPhilipp Reisner 
1166b411b363SPhilipp Reisner /**
1167b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1168b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1169b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1170b411b363SPhilipp Reisner  * @ev:		Epoch event.
1171b411b363SPhilipp Reisner  */
11721e9dd291SPhilipp Reisner static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
1173b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1174b411b363SPhilipp Reisner 					       enum epoch_event ev)
1175b411b363SPhilipp Reisner {
11762451fc3bSPhilipp Reisner 	int epoch_size;
1177b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1178b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1179b411b363SPhilipp Reisner 
118012038a3aSPhilipp Reisner 	spin_lock(&tconn->epoch_lock);
1181b411b363SPhilipp Reisner 	do {
1182b411b363SPhilipp Reisner 		next_epoch = NULL;
1183b411b363SPhilipp Reisner 
1184b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1185b411b363SPhilipp Reisner 
1186b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1187b411b363SPhilipp Reisner 		case EV_PUT:
1188b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1189b411b363SPhilipp Reisner 			break;
1190b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1191b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1192b411b363SPhilipp Reisner 			break;
1193b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1194b411b363SPhilipp Reisner 			/* nothing to do*/
1195b411b363SPhilipp Reisner 			break;
1196b411b363SPhilipp Reisner 		}
1197b411b363SPhilipp Reisner 
1198b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1199b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
120080f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1201b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
120212038a3aSPhilipp Reisner 				spin_unlock(&tconn->epoch_lock);
12039ed57dcbSLars Ellenberg 				drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
120412038a3aSPhilipp Reisner 				spin_lock(&tconn->epoch_lock);
1205b411b363SPhilipp Reisner 			}
12069ed57dcbSLars Ellenberg #if 0
12079ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
12089ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
120980f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
12109ed57dcbSLars Ellenberg 				dec_unacked(epoch->tconn);
12119ed57dcbSLars Ellenberg #endif
1212b411b363SPhilipp Reisner 
121312038a3aSPhilipp Reisner 			if (tconn->current_epoch != epoch) {
1214b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1215b411b363SPhilipp Reisner 				list_del(&epoch->list);
1216b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
121712038a3aSPhilipp Reisner 				tconn->epochs--;
1218b411b363SPhilipp Reisner 				kfree(epoch);
1219b411b363SPhilipp Reisner 
1220b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1221b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1222b411b363SPhilipp Reisner 			} else {
1223b411b363SPhilipp Reisner 				epoch->flags = 0;
1224b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1225698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1226b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1227b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1228b411b363SPhilipp Reisner 			}
1229b411b363SPhilipp Reisner 		}
1230b411b363SPhilipp Reisner 
1231b411b363SPhilipp Reisner 		if (!next_epoch)
1232b411b363SPhilipp Reisner 			break;
1233b411b363SPhilipp Reisner 
1234b411b363SPhilipp Reisner 		epoch = next_epoch;
1235b411b363SPhilipp Reisner 	} while (1);
1236b411b363SPhilipp Reisner 
123712038a3aSPhilipp Reisner 	spin_unlock(&tconn->epoch_lock);
1238b411b363SPhilipp Reisner 
1239b411b363SPhilipp Reisner 	return rv;
1240b411b363SPhilipp Reisner }
1241b411b363SPhilipp Reisner 
1242b411b363SPhilipp Reisner /**
1243b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
12444b0007c0SPhilipp Reisner  * @tconn:	DRBD connection.
1245b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1246b411b363SPhilipp Reisner  */
12474b0007c0SPhilipp Reisner void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
1248b411b363SPhilipp Reisner {
1249daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
12504b0007c0SPhilipp Reisner 	struct drbd_conf *mdev;
1251b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
12524b0007c0SPhilipp Reisner 	int vnr;
1253b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1254b411b363SPhilipp Reisner 		[WO_none] = "none",
1255b411b363SPhilipp Reisner 		[WO_drain_io] = "drain",
1256b411b363SPhilipp Reisner 		[WO_bdev_flush] = "flush",
1257b411b363SPhilipp Reisner 	};
1258b411b363SPhilipp Reisner 
12594b0007c0SPhilipp Reisner 	pwo = tconn->write_ordering;
1260b411b363SPhilipp Reisner 	wo = min(pwo, wo);
1261daeda1ccSPhilipp Reisner 	rcu_read_lock();
12624b0007c0SPhilipp Reisner 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
126327eb13e9SPhilipp Reisner 		if (!get_ldev_if_state(mdev, D_ATTACHING))
12644b0007c0SPhilipp Reisner 			continue;
1265daeda1ccSPhilipp Reisner 		dc = rcu_dereference(mdev->ldev->disk_conf);
1266daeda1ccSPhilipp Reisner 
126766b2f6b9SAndreas Gruenbacher 		if (wo == WO_bdev_flush && !dc->disk_flushes)
1268b411b363SPhilipp Reisner 			wo = WO_drain_io;
1269d0c980e2SAndreas Gruenbacher 		if (wo == WO_drain_io && !dc->disk_drain)
1270b411b363SPhilipp Reisner 			wo = WO_none;
12714b0007c0SPhilipp Reisner 		put_ldev(mdev);
12724b0007c0SPhilipp Reisner 	}
1273daeda1ccSPhilipp Reisner 	rcu_read_unlock();
12744b0007c0SPhilipp Reisner 	tconn->write_ordering = wo;
12754b0007c0SPhilipp Reisner 	if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
12764b0007c0SPhilipp Reisner 		conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
1277b411b363SPhilipp Reisner }
1278b411b363SPhilipp Reisner 
1279b411b363SPhilipp Reisner /**
1280fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
128145bb912bSLars Ellenberg  * @mdev:	DRBD device.
1282db830c46SAndreas Gruenbacher  * @peer_req:	peer request
128345bb912bSLars Ellenberg  * @rw:		flag field, see bio->bi_rw
128410f6d992SLars Ellenberg  *
128510f6d992SLars Ellenberg  * May spread the pages to multiple bios,
128610f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
128710f6d992SLars Ellenberg  *
128810f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
128910f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
129010f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
129110f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
129210f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
129310f6d992SLars Ellenberg  *  on certain Xen deployments.
129445bb912bSLars Ellenberg  */
129545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1296fbe29decSAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_conf *mdev,
1297fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
129845bb912bSLars Ellenberg 			     const unsigned rw, const int fault_type)
129945bb912bSLars Ellenberg {
130045bb912bSLars Ellenberg 	struct bio *bios = NULL;
130145bb912bSLars Ellenberg 	struct bio *bio;
1302db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1303db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1304db830c46SAndreas Gruenbacher 	unsigned ds = peer_req->i.size;
130545bb912bSLars Ellenberg 	unsigned n_bios = 0;
130645bb912bSLars Ellenberg 	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
130710f6d992SLars Ellenberg 	int err = -ENOMEM;
130845bb912bSLars Ellenberg 
130945bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
131045bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
131145bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
13129476f39dSLars Ellenberg 	 * request in more than one bio.
13139476f39dSLars Ellenberg 	 *
13149476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
13159476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
13169476f39dSLars Ellenberg 	 */
131745bb912bSLars Ellenberg next_bio:
131845bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
131945bb912bSLars Ellenberg 	if (!bio) {
132045bb912bSLars Ellenberg 		dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
132145bb912bSLars Ellenberg 		goto fail;
132245bb912bSLars Ellenberg 	}
1323db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
132445bb912bSLars Ellenberg 	bio->bi_sector = sector;
132545bb912bSLars Ellenberg 	bio->bi_bdev = mdev->ldev->backing_bdev;
132645bb912bSLars Ellenberg 	bio->bi_rw = rw;
1327db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1328fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
132945bb912bSLars Ellenberg 
133045bb912bSLars Ellenberg 	bio->bi_next = bios;
133145bb912bSLars Ellenberg 	bios = bio;
133245bb912bSLars Ellenberg 	++n_bios;
133345bb912bSLars Ellenberg 
133445bb912bSLars Ellenberg 	page_chain_for_each(page) {
133545bb912bSLars Ellenberg 		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
133645bb912bSLars Ellenberg 		if (!bio_add_page(bio, page, len, 0)) {
133710f6d992SLars Ellenberg 			/* A single page must always be possible!
133810f6d992SLars Ellenberg 			 * But in case it fails anyways,
133910f6d992SLars Ellenberg 			 * we deal with it, and complain (below). */
134010f6d992SLars Ellenberg 			if (bio->bi_vcnt == 0) {
134110f6d992SLars Ellenberg 				dev_err(DEV,
134210f6d992SLars Ellenberg 					"bio_add_page failed for len=%u, "
134310f6d992SLars Ellenberg 					"bi_vcnt=0 (bi_sector=%llu)\n",
134410f6d992SLars Ellenberg 					len, (unsigned long long)bio->bi_sector);
134510f6d992SLars Ellenberg 				err = -ENOSPC;
134610f6d992SLars Ellenberg 				goto fail;
134710f6d992SLars Ellenberg 			}
134845bb912bSLars Ellenberg 			goto next_bio;
134945bb912bSLars Ellenberg 		}
135045bb912bSLars Ellenberg 		ds -= len;
135145bb912bSLars Ellenberg 		sector += len >> 9;
135245bb912bSLars Ellenberg 		--nr_pages;
135345bb912bSLars Ellenberg 	}
135445bb912bSLars Ellenberg 	D_ASSERT(page == NULL);
135545bb912bSLars Ellenberg 	D_ASSERT(ds == 0);
135645bb912bSLars Ellenberg 
1357db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
135845bb912bSLars Ellenberg 	do {
135945bb912bSLars Ellenberg 		bio = bios;
136045bb912bSLars Ellenberg 		bios = bios->bi_next;
136145bb912bSLars Ellenberg 		bio->bi_next = NULL;
136245bb912bSLars Ellenberg 
136345bb912bSLars Ellenberg 		drbd_generic_make_request(mdev, fault_type, bio);
136445bb912bSLars Ellenberg 	} while (bios);
136545bb912bSLars Ellenberg 	return 0;
136645bb912bSLars Ellenberg 
136745bb912bSLars Ellenberg fail:
136845bb912bSLars Ellenberg 	while (bios) {
136945bb912bSLars Ellenberg 		bio = bios;
137045bb912bSLars Ellenberg 		bios = bios->bi_next;
137145bb912bSLars Ellenberg 		bio_put(bio);
137245bb912bSLars Ellenberg 	}
137310f6d992SLars Ellenberg 	return err;
137445bb912bSLars Ellenberg }
137545bb912bSLars Ellenberg 
137653840641SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
1377db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
137853840641SAndreas Gruenbacher {
1379db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
138053840641SAndreas Gruenbacher 
138153840641SAndreas Gruenbacher 	drbd_remove_interval(&mdev->write_requests, i);
138253840641SAndreas Gruenbacher 	drbd_clear_interval(i);
138353840641SAndreas Gruenbacher 
13846c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
138553840641SAndreas Gruenbacher 	if (i->waiting)
138653840641SAndreas Gruenbacher 		wake_up(&mdev->misc_wait);
138753840641SAndreas Gruenbacher }
138853840641SAndreas Gruenbacher 
138977fede51SPhilipp Reisner void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
139077fede51SPhilipp Reisner {
139177fede51SPhilipp Reisner 	struct drbd_conf *mdev;
139277fede51SPhilipp Reisner 	int vnr;
139377fede51SPhilipp Reisner 
139477fede51SPhilipp Reisner 	rcu_read_lock();
139577fede51SPhilipp Reisner 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
139677fede51SPhilipp Reisner 		kref_get(&mdev->kref);
139777fede51SPhilipp Reisner 		rcu_read_unlock();
139877fede51SPhilipp Reisner 		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
139977fede51SPhilipp Reisner 		kref_put(&mdev->kref, &drbd_minor_destroy);
140077fede51SPhilipp Reisner 		rcu_read_lock();
140177fede51SPhilipp Reisner 	}
140277fede51SPhilipp Reisner 	rcu_read_unlock();
140377fede51SPhilipp Reisner }
140477fede51SPhilipp Reisner 
14054a76b161SAndreas Gruenbacher static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
1406b411b363SPhilipp Reisner {
14072451fc3bSPhilipp Reisner 	int rv;
1408e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1409b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1410b411b363SPhilipp Reisner 
14119ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
14129ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
14139ed57dcbSLars Ellenberg 	 */
141412038a3aSPhilipp Reisner 	tconn->current_epoch->barrier_nr = p->barrier;
14159ed57dcbSLars Ellenberg 	tconn->current_epoch->tconn = tconn;
14161e9dd291SPhilipp Reisner 	rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
1417b411b363SPhilipp Reisner 
1418b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1419b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1420b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1421b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1422b411b363SPhilipp Reisner 	 * completed. */
14234b0007c0SPhilipp Reisner 	switch (tconn->write_ordering) {
1424b411b363SPhilipp Reisner 	case WO_none:
1425b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
142682bc0194SAndreas Gruenbacher 			return 0;
1427b411b363SPhilipp Reisner 
1428b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1429b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1430b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
14312451fc3bSPhilipp Reisner 		if (epoch)
14322451fc3bSPhilipp Reisner 			break;
14332451fc3bSPhilipp Reisner 		else
14349ed57dcbSLars Ellenberg 			conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
14352451fc3bSPhilipp Reisner 			/* Fall through */
14362451fc3bSPhilipp Reisner 
14372451fc3bSPhilipp Reisner 	case WO_bdev_flush:
14382451fc3bSPhilipp Reisner 	case WO_drain_io:
143977fede51SPhilipp Reisner 		conn_wait_active_ee_empty(tconn);
14404b0007c0SPhilipp Reisner 		drbd_flush(tconn);
14412451fc3bSPhilipp Reisner 
144212038a3aSPhilipp Reisner 		if (atomic_read(&tconn->current_epoch->epoch_size)) {
14432451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
14442451fc3bSPhilipp Reisner 			if (epoch)
14452451fc3bSPhilipp Reisner 				break;
1446b411b363SPhilipp Reisner 		}
1447b411b363SPhilipp Reisner 
144882bc0194SAndreas Gruenbacher 		return 0;
14492451fc3bSPhilipp Reisner 	default:
14509ed57dcbSLars Ellenberg 		conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
145182bc0194SAndreas Gruenbacher 		return -EIO;
1452b411b363SPhilipp Reisner 	}
1453b411b363SPhilipp Reisner 
1454b411b363SPhilipp Reisner 	epoch->flags = 0;
1455b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1456b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1457b411b363SPhilipp Reisner 
145812038a3aSPhilipp Reisner 	spin_lock(&tconn->epoch_lock);
145912038a3aSPhilipp Reisner 	if (atomic_read(&tconn->current_epoch->epoch_size)) {
146012038a3aSPhilipp Reisner 		list_add(&epoch->list, &tconn->current_epoch->list);
146112038a3aSPhilipp Reisner 		tconn->current_epoch = epoch;
146212038a3aSPhilipp Reisner 		tconn->epochs++;
1463b411b363SPhilipp Reisner 	} else {
1464b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1465b411b363SPhilipp Reisner 		kfree(epoch);
1466b411b363SPhilipp Reisner 	}
146712038a3aSPhilipp Reisner 	spin_unlock(&tconn->epoch_lock);
1468b411b363SPhilipp Reisner 
146982bc0194SAndreas Gruenbacher 	return 0;
1470b411b363SPhilipp Reisner }
1471b411b363SPhilipp Reisner 
1472b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
1473b411b363SPhilipp Reisner  * and from receive_Data */
1474f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
1475f6ffca9fSAndreas Gruenbacher read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1476f6ffca9fSAndreas Gruenbacher 	      int data_size) __must_hold(local)
1477b411b363SPhilipp Reisner {
14786666032aSLars Ellenberg 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
1479db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1480b411b363SPhilipp Reisner 	struct page *page;
1481a5c31904SAndreas Gruenbacher 	int dgs, ds, err;
1482a0638456SPhilipp Reisner 	void *dig_in = mdev->tconn->int_dig_in;
1483a0638456SPhilipp Reisner 	void *dig_vv = mdev->tconn->int_dig_vv;
14846b4388acSPhilipp Reisner 	unsigned long *data;
1485b411b363SPhilipp Reisner 
148688104ca4SAndreas Gruenbacher 	dgs = 0;
148788104ca4SAndreas Gruenbacher 	if (mdev->tconn->peer_integrity_tfm) {
148888104ca4SAndreas Gruenbacher 		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
14899f5bdc33SAndreas Gruenbacher 		/*
14909f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
14919f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
14929f5bdc33SAndreas Gruenbacher 		 */
1493a5c31904SAndreas Gruenbacher 		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1494a5c31904SAndreas Gruenbacher 		if (err)
1495b411b363SPhilipp Reisner 			return NULL;
1496b411b363SPhilipp Reisner 		data_size -= dgs;
149788104ca4SAndreas Gruenbacher 	}
1498b411b363SPhilipp Reisner 
1499841ce241SAndreas Gruenbacher 	if (!expect(IS_ALIGNED(data_size, 512)))
1500841ce241SAndreas Gruenbacher 		return NULL;
1501841ce241SAndreas Gruenbacher 	if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1502841ce241SAndreas Gruenbacher 		return NULL;
1503b411b363SPhilipp Reisner 
15046666032aSLars Ellenberg 	/* even though we trust out peer,
15056666032aSLars Ellenberg 	 * we sometimes have to double check. */
15066666032aSLars Ellenberg 	if (sector + (data_size>>9) > capacity) {
1507fdda6544SLars Ellenberg 		dev_err(DEV, "request from peer beyond end of local disk: "
1508fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
15096666032aSLars Ellenberg 			(unsigned long long)capacity,
15106666032aSLars Ellenberg 			(unsigned long long)sector, data_size);
15116666032aSLars Ellenberg 		return NULL;
15126666032aSLars Ellenberg 	}
15136666032aSLars Ellenberg 
1514b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1515b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1516b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
15170db55363SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
1518db830c46SAndreas Gruenbacher 	if (!peer_req)
1519b411b363SPhilipp Reisner 		return NULL;
152045bb912bSLars Ellenberg 
1521a73ff323SLars Ellenberg 	if (!data_size)
152281a3537aSLars Ellenberg 		return peer_req;
1523a73ff323SLars Ellenberg 
1524b411b363SPhilipp Reisner 	ds = data_size;
1525db830c46SAndreas Gruenbacher 	page = peer_req->pages;
152645bb912bSLars Ellenberg 	page_chain_for_each(page) {
152745bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
15286b4388acSPhilipp Reisner 		data = kmap(page);
1529a5c31904SAndreas Gruenbacher 		err = drbd_recv_all_warn(mdev->tconn, data, len);
15300cf9d27eSAndreas Gruenbacher 		if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
15316b4388acSPhilipp Reisner 			dev_err(DEV, "Fault injection: Corrupting data on receive\n");
15326b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
15336b4388acSPhilipp Reisner 		}
1534b411b363SPhilipp Reisner 		kunmap(page);
1535a5c31904SAndreas Gruenbacher 		if (err) {
15363967deb1SAndreas Gruenbacher 			drbd_free_peer_req(mdev, peer_req);
1537b411b363SPhilipp Reisner 			return NULL;
1538b411b363SPhilipp Reisner 		}
1539a5c31904SAndreas Gruenbacher 		ds -= len;
1540b411b363SPhilipp Reisner 	}
1541b411b363SPhilipp Reisner 
1542b411b363SPhilipp Reisner 	if (dgs) {
15435b614abeSAndreas Gruenbacher 		drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
1544b411b363SPhilipp Reisner 		if (memcmp(dig_in, dig_vv, dgs)) {
1545470be44aSLars Ellenberg 			dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1546470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
15473967deb1SAndreas Gruenbacher 			drbd_free_peer_req(mdev, peer_req);
1548b411b363SPhilipp Reisner 			return NULL;
1549b411b363SPhilipp Reisner 		}
1550b411b363SPhilipp Reisner 	}
1551b411b363SPhilipp Reisner 	mdev->recv_cnt += data_size>>9;
1552db830c46SAndreas Gruenbacher 	return peer_req;
1553b411b363SPhilipp Reisner }
1554b411b363SPhilipp Reisner 
1555b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1556b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1557b411b363SPhilipp Reisner  */
1558b411b363SPhilipp Reisner static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1559b411b363SPhilipp Reisner {
1560b411b363SPhilipp Reisner 	struct page *page;
1561a5c31904SAndreas Gruenbacher 	int err = 0;
1562b411b363SPhilipp Reisner 	void *data;
1563b411b363SPhilipp Reisner 
1564c3470cdeSLars Ellenberg 	if (!data_size)
1565fc5be839SAndreas Gruenbacher 		return 0;
1566c3470cdeSLars Ellenberg 
1567c37c8ecfSAndreas Gruenbacher 	page = drbd_alloc_pages(mdev, 1, 1);
1568b411b363SPhilipp Reisner 
1569b411b363SPhilipp Reisner 	data = kmap(page);
1570b411b363SPhilipp Reisner 	while (data_size) {
1571fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1572fc5be839SAndreas Gruenbacher 
1573a5c31904SAndreas Gruenbacher 		err = drbd_recv_all_warn(mdev->tconn, data, len);
1574a5c31904SAndreas Gruenbacher 		if (err)
1575b411b363SPhilipp Reisner 			break;
1576a5c31904SAndreas Gruenbacher 		data_size -= len;
1577b411b363SPhilipp Reisner 	}
1578b411b363SPhilipp Reisner 	kunmap(page);
15795cc287e0SAndreas Gruenbacher 	drbd_free_pages(mdev, page, 0);
1580fc5be839SAndreas Gruenbacher 	return err;
1581b411b363SPhilipp Reisner }
1582b411b363SPhilipp Reisner 
1583b411b363SPhilipp Reisner static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1584b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1585b411b363SPhilipp Reisner {
1586b411b363SPhilipp Reisner 	struct bio_vec *bvec;
1587b411b363SPhilipp Reisner 	struct bio *bio;
1588a5c31904SAndreas Gruenbacher 	int dgs, err, i, expect;
1589a0638456SPhilipp Reisner 	void *dig_in = mdev->tconn->int_dig_in;
1590a0638456SPhilipp Reisner 	void *dig_vv = mdev->tconn->int_dig_vv;
1591b411b363SPhilipp Reisner 
159288104ca4SAndreas Gruenbacher 	dgs = 0;
159388104ca4SAndreas Gruenbacher 	if (mdev->tconn->peer_integrity_tfm) {
159488104ca4SAndreas Gruenbacher 		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
1595a5c31904SAndreas Gruenbacher 		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1596a5c31904SAndreas Gruenbacher 		if (err)
1597a5c31904SAndreas Gruenbacher 			return err;
1598b411b363SPhilipp Reisner 		data_size -= dgs;
159988104ca4SAndreas Gruenbacher 	}
1600b411b363SPhilipp Reisner 
1601b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
1602b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
1603b411b363SPhilipp Reisner 	mdev->recv_cnt += data_size>>9;
1604b411b363SPhilipp Reisner 
1605b411b363SPhilipp Reisner 	bio = req->master_bio;
1606b411b363SPhilipp Reisner 	D_ASSERT(sector == bio->bi_sector);
1607b411b363SPhilipp Reisner 
1608b411b363SPhilipp Reisner 	bio_for_each_segment(bvec, bio, i) {
1609a5c31904SAndreas Gruenbacher 		void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
1610b411b363SPhilipp Reisner 		expect = min_t(int, data_size, bvec->bv_len);
1611a5c31904SAndreas Gruenbacher 		err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
1612b411b363SPhilipp Reisner 		kunmap(bvec->bv_page);
1613a5c31904SAndreas Gruenbacher 		if (err)
1614a5c31904SAndreas Gruenbacher 			return err;
1615a5c31904SAndreas Gruenbacher 		data_size -= expect;
1616b411b363SPhilipp Reisner 	}
1617b411b363SPhilipp Reisner 
1618b411b363SPhilipp Reisner 	if (dgs) {
16195b614abeSAndreas Gruenbacher 		drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
1620b411b363SPhilipp Reisner 		if (memcmp(dig_in, dig_vv, dgs)) {
1621b411b363SPhilipp Reisner 			dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
162228284cefSAndreas Gruenbacher 			return -EINVAL;
1623b411b363SPhilipp Reisner 		}
1624b411b363SPhilipp Reisner 	}
1625b411b363SPhilipp Reisner 
1626b411b363SPhilipp Reisner 	D_ASSERT(data_size == 0);
162728284cefSAndreas Gruenbacher 	return 0;
1628b411b363SPhilipp Reisner }
1629b411b363SPhilipp Reisner 
1630a990be46SAndreas Gruenbacher /*
1631a990be46SAndreas Gruenbacher  * e_end_resync_block() is called in asender context via
1632a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
1633a990be46SAndreas Gruenbacher  */
163499920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
1635b411b363SPhilipp Reisner {
16368050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
16378050e6d0SAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
163800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1639db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
164099920dc5SAndreas Gruenbacher 	int err;
1641b411b363SPhilipp Reisner 
1642db830c46SAndreas Gruenbacher 	D_ASSERT(drbd_interval_empty(&peer_req->i));
1643b411b363SPhilipp Reisner 
1644db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1645db830c46SAndreas Gruenbacher 		drbd_set_in_sync(mdev, sector, peer_req->i.size);
164699920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
1647b411b363SPhilipp Reisner 	} else {
1648b411b363SPhilipp Reisner 		/* Record failure to sync */
1649db830c46SAndreas Gruenbacher 		drbd_rs_failed_io(mdev, sector, peer_req->i.size);
1650b411b363SPhilipp Reisner 
165199920dc5SAndreas Gruenbacher 		err  = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
1652b411b363SPhilipp Reisner 	}
1653b411b363SPhilipp Reisner 	dec_unacked(mdev);
1654b411b363SPhilipp Reisner 
165599920dc5SAndreas Gruenbacher 	return err;
1656b411b363SPhilipp Reisner }
1657b411b363SPhilipp Reisner 
1658b411b363SPhilipp Reisner static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1659b411b363SPhilipp Reisner {
1660db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1661b411b363SPhilipp Reisner 
1662db830c46SAndreas Gruenbacher 	peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1663db830c46SAndreas Gruenbacher 	if (!peer_req)
166445bb912bSLars Ellenberg 		goto fail;
1665b411b363SPhilipp Reisner 
1666b411b363SPhilipp Reisner 	dec_rs_pending(mdev);
1667b411b363SPhilipp Reisner 
1668b411b363SPhilipp Reisner 	inc_unacked(mdev);
1669b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
1670b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
1671b411b363SPhilipp Reisner 
1672db830c46SAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
167345bb912bSLars Ellenberg 
167487eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
1675db830c46SAndreas Gruenbacher 	list_add(&peer_req->w.list, &mdev->sync_ee);
167687eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
1677b411b363SPhilipp Reisner 
16780f0601f4SLars Ellenberg 	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1679fbe29decSAndreas Gruenbacher 	if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
1680e1c1b0fcSAndreas Gruenbacher 		return 0;
168145bb912bSLars Ellenberg 
168210f6d992SLars Ellenberg 	/* don't care for the reason here */
168310f6d992SLars Ellenberg 	dev_err(DEV, "submit failed, triggering re-connect\n");
168487eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
1685db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
168687eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
168722cc37a9SLars Ellenberg 
16883967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
168945bb912bSLars Ellenberg fail:
169045bb912bSLars Ellenberg 	put_ldev(mdev);
1691e1c1b0fcSAndreas Gruenbacher 	return -EIO;
1692b411b363SPhilipp Reisner }
1693b411b363SPhilipp Reisner 
1694668eebc6SAndreas Gruenbacher static struct drbd_request *
1695bc9c5c41SAndreas Gruenbacher find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1696bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
1697b411b363SPhilipp Reisner {
1698b411b363SPhilipp Reisner 	struct drbd_request *req;
1699668eebc6SAndreas Gruenbacher 
1700bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
1701bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
17025e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
1703668eebc6SAndreas Gruenbacher 		return req;
1704c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
17055af172edSAndreas Gruenbacher 		dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
1706c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
1707c3afd8f5SAndreas Gruenbacher 	}
1708668eebc6SAndreas Gruenbacher 	return NULL;
1709668eebc6SAndreas Gruenbacher }
1710668eebc6SAndreas Gruenbacher 
17114a76b161SAndreas Gruenbacher static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
1712b411b363SPhilipp Reisner {
17134a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
1714b411b363SPhilipp Reisner 	struct drbd_request *req;
1715b411b363SPhilipp Reisner 	sector_t sector;
171682bc0194SAndreas Gruenbacher 	int err;
1717e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
17184a76b161SAndreas Gruenbacher 
17194a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
17204a76b161SAndreas Gruenbacher 	if (!mdev)
17214a76b161SAndreas Gruenbacher 		return -EIO;
1722b411b363SPhilipp Reisner 
1723b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
1724b411b363SPhilipp Reisner 
172587eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
1726bc9c5c41SAndreas Gruenbacher 	req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
172787eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
1728c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
172982bc0194SAndreas Gruenbacher 		return -EIO;
1730b411b363SPhilipp Reisner 
173124c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
1732b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
1733b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
1734e2857216SAndreas Gruenbacher 	err = recv_dless_read(mdev, req, sector, pi->size);
173582bc0194SAndreas Gruenbacher 	if (!err)
17368554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
1737b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
1738b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
1739b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
1740b411b363SPhilipp Reisner 
174182bc0194SAndreas Gruenbacher 	return err;
1742b411b363SPhilipp Reisner }
1743b411b363SPhilipp Reisner 
17444a76b161SAndreas Gruenbacher static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
1745b411b363SPhilipp Reisner {
17464a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
1747b411b363SPhilipp Reisner 	sector_t sector;
174882bc0194SAndreas Gruenbacher 	int err;
1749e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
17504a76b161SAndreas Gruenbacher 
17514a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
17524a76b161SAndreas Gruenbacher 	if (!mdev)
17534a76b161SAndreas Gruenbacher 		return -EIO;
1754b411b363SPhilipp Reisner 
1755b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
1756b411b363SPhilipp Reisner 	D_ASSERT(p->block_id == ID_SYNCER);
1757b411b363SPhilipp Reisner 
1758b411b363SPhilipp Reisner 	if (get_ldev(mdev)) {
1759b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
1760b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
1761fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
1762e2857216SAndreas Gruenbacher 		err = recv_resync_read(mdev, sector, pi->size);
1763b411b363SPhilipp Reisner 	} else {
1764b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1765b411b363SPhilipp Reisner 			dev_err(DEV, "Can not write resync data to local disk.\n");
1766b411b363SPhilipp Reisner 
1767e2857216SAndreas Gruenbacher 		err = drbd_drain_block(mdev, pi->size);
1768b411b363SPhilipp Reisner 
1769e2857216SAndreas Gruenbacher 		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
1770b411b363SPhilipp Reisner 	}
1771b411b363SPhilipp Reisner 
1772e2857216SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &mdev->rs_sect_in);
1773778f271dSPhilipp Reisner 
177482bc0194SAndreas Gruenbacher 	return err;
1775b411b363SPhilipp Reisner }
1776b411b363SPhilipp Reisner 
17777be8da07SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_conf *mdev,
17787be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
1779b411b363SPhilipp Reisner {
17807be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
17817be8da07SAndreas Gruenbacher 	struct drbd_request *req;
1782b411b363SPhilipp Reisner 
17837be8da07SAndreas Gruenbacher 	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
17847be8da07SAndreas Gruenbacher 		if (!i->local)
17857be8da07SAndreas Gruenbacher 			continue;
17867be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
17877be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
17887be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
17897be8da07SAndreas Gruenbacher 			continue;
17902312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
17912312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
1792d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
17937be8da07SAndreas Gruenbacher 	}
17947be8da07SAndreas Gruenbacher }
17957be8da07SAndreas Gruenbacher 
1796a990be46SAndreas Gruenbacher /*
1797a990be46SAndreas Gruenbacher  * e_end_block() is called in asender context via drbd_finish_peer_reqs().
1798b411b363SPhilipp Reisner  */
179999920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
1800b411b363SPhilipp Reisner {
18018050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
18028050e6d0SAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
180300d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1804db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
180599920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
1806b411b363SPhilipp Reisner 
1807303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
1808db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1809b411b363SPhilipp Reisner 			pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1810b411b363SPhilipp Reisner 				mdev->state.conn <= C_PAUSED_SYNC_T &&
1811db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
1812b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
181399920dc5SAndreas Gruenbacher 			err = drbd_send_ack(mdev, pcmd, peer_req);
1814b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
1815db830c46SAndreas Gruenbacher 				drbd_set_in_sync(mdev, sector, peer_req->i.size);
1816b411b363SPhilipp Reisner 		} else {
181799920dc5SAndreas Gruenbacher 			err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
1818b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
1819b411b363SPhilipp Reisner 			 * maybe assert this?  */
1820b411b363SPhilipp Reisner 		}
1821b411b363SPhilipp Reisner 		dec_unacked(mdev);
1822b411b363SPhilipp Reisner 	}
1823b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
1824b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
1825302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
182687eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
1827db830c46SAndreas Gruenbacher 		D_ASSERT(!drbd_interval_empty(&peer_req->i));
1828db830c46SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(mdev, peer_req);
18297be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
18307be8da07SAndreas Gruenbacher 			restart_conflicting_writes(mdev, sector, peer_req->i.size);
183187eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
1832bb3bfe96SAndreas Gruenbacher 	} else
1833db830c46SAndreas Gruenbacher 		D_ASSERT(drbd_interval_empty(&peer_req->i));
1834b411b363SPhilipp Reisner 
18351e9dd291SPhilipp Reisner 	drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
1836b411b363SPhilipp Reisner 
183799920dc5SAndreas Gruenbacher 	return err;
1838b411b363SPhilipp Reisner }
1839b411b363SPhilipp Reisner 
18407be8da07SAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
1841b411b363SPhilipp Reisner {
18427be8da07SAndreas Gruenbacher 	struct drbd_conf *mdev = w->mdev;
18438050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
18448050e6d0SAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
184599920dc5SAndreas Gruenbacher 	int err;
1846b411b363SPhilipp Reisner 
184799920dc5SAndreas Gruenbacher 	err = drbd_send_ack(mdev, ack, peer_req);
1848b411b363SPhilipp Reisner 	dec_unacked(mdev);
1849b411b363SPhilipp Reisner 
185099920dc5SAndreas Gruenbacher 	return err;
1851b411b363SPhilipp Reisner }
1852b411b363SPhilipp Reisner 
1853d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
1854b6a370baSPhilipp Reisner {
1855d4dabbe2SLars Ellenberg 	return e_send_ack(w, P_SUPERSEDED);
18567be8da07SAndreas Gruenbacher }
1857b6a370baSPhilipp Reisner 
185899920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
18597be8da07SAndreas Gruenbacher {
18607be8da07SAndreas Gruenbacher 	struct drbd_tconn *tconn = w->mdev->tconn;
18617be8da07SAndreas Gruenbacher 
18627be8da07SAndreas Gruenbacher 	return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1863d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
18647be8da07SAndreas Gruenbacher }
18657be8da07SAndreas Gruenbacher 
18663e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
18673e394da1SAndreas Gruenbacher {
18683e394da1SAndreas Gruenbacher 	/*
18693e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
18703e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
18713e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
18723e394da1SAndreas Gruenbacher 	 */
18733e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
18743e394da1SAndreas Gruenbacher }
18753e394da1SAndreas Gruenbacher 
18763e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
18773e394da1SAndreas Gruenbacher {
18783e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
18793e394da1SAndreas Gruenbacher }
18803e394da1SAndreas Gruenbacher 
18817be8da07SAndreas Gruenbacher static bool need_peer_seq(struct drbd_conf *mdev)
18827be8da07SAndreas Gruenbacher {
18837be8da07SAndreas Gruenbacher 	struct drbd_tconn *tconn = mdev->tconn;
1884302bdeaeSPhilipp Reisner 	int tp;
18857be8da07SAndreas Gruenbacher 
18867be8da07SAndreas Gruenbacher 	/*
18877be8da07SAndreas Gruenbacher 	 * We only need to keep track of the last packet_seq number of our peer
1888427c0434SLars Ellenberg 	 * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
18897be8da07SAndreas Gruenbacher 	 * handle_write_conflicts().
18907be8da07SAndreas Gruenbacher 	 */
1891302bdeaeSPhilipp Reisner 
1892302bdeaeSPhilipp Reisner 	rcu_read_lock();
1893302bdeaeSPhilipp Reisner 	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1894302bdeaeSPhilipp Reisner 	rcu_read_unlock();
1895302bdeaeSPhilipp Reisner 
1896427c0434SLars Ellenberg 	return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
18977be8da07SAndreas Gruenbacher }
18987be8da07SAndreas Gruenbacher 
189943ae077dSAndreas Gruenbacher static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
19003e394da1SAndreas Gruenbacher {
19013c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
19023e394da1SAndreas Gruenbacher 
19037be8da07SAndreas Gruenbacher 	if (need_peer_seq(mdev)) {
19043e394da1SAndreas Gruenbacher 		spin_lock(&mdev->peer_seq_lock);
19053c13b680SLars Ellenberg 		newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
19063c13b680SLars Ellenberg 		mdev->peer_seq = newest_peer_seq;
19073e394da1SAndreas Gruenbacher 		spin_unlock(&mdev->peer_seq_lock);
19083c13b680SLars Ellenberg 		/* wake up only if we actually changed mdev->peer_seq */
19093c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
19103e394da1SAndreas Gruenbacher 			wake_up(&mdev->seq_wait);
19113e394da1SAndreas Gruenbacher 	}
19127be8da07SAndreas Gruenbacher }
19133e394da1SAndreas Gruenbacher 
1914d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1915d93f6302SLars Ellenberg {
1916d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1917d93f6302SLars Ellenberg }
1918d93f6302SLars Ellenberg 
1919d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
19203ea35df8SPhilipp Reisner static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
1921d93f6302SLars Ellenberg {
1922d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
1923b6a370baSPhilipp Reisner 	bool rv = 0;
1924b6a370baSPhilipp Reisner 
1925d93f6302SLars Ellenberg 	spin_lock_irq(&mdev->tconn->req_lock);
1926d93f6302SLars Ellenberg 	list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1927d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
1928d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
1929b6a370baSPhilipp Reisner 			rv = 1;
1930b6a370baSPhilipp Reisner 			break;
1931b6a370baSPhilipp Reisner 		}
1932b6a370baSPhilipp Reisner 	}
1933d93f6302SLars Ellenberg 	spin_unlock_irq(&mdev->tconn->req_lock);
1934b6a370baSPhilipp Reisner 
1935b6a370baSPhilipp Reisner 	return rv;
1936b6a370baSPhilipp Reisner }
1937b6a370baSPhilipp Reisner 
1938b411b363SPhilipp Reisner /* Called from receive_Data.
1939b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
1940b411b363SPhilipp Reisner  *
1941b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1942b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
1943b411b363SPhilipp Reisner  * been sent.
1944b411b363SPhilipp Reisner  *
1945b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
1946b411b363SPhilipp Reisner  *
1947b411b363SPhilipp Reisner  * In case packet_seq is larger than mdev->peer_seq number, there are
1948b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
1949b411b363SPhilipp Reisner  * In case we are the logically next packet, we update mdev->peer_seq
1950b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
1951b411b363SPhilipp Reisner  *
1952b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1953b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1954b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1955b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1956b411b363SPhilipp Reisner  *
1957b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
1958b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
19597be8da07SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
1960b411b363SPhilipp Reisner {
1961b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
1962b411b363SPhilipp Reisner 	long timeout;
19637be8da07SAndreas Gruenbacher 	int ret;
19647be8da07SAndreas Gruenbacher 
19657be8da07SAndreas Gruenbacher 	if (!need_peer_seq(mdev))
19667be8da07SAndreas Gruenbacher 		return 0;
19677be8da07SAndreas Gruenbacher 
1968b411b363SPhilipp Reisner 	spin_lock(&mdev->peer_seq_lock);
1969b411b363SPhilipp Reisner 	for (;;) {
19707be8da07SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
19717be8da07SAndreas Gruenbacher 			mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
19727be8da07SAndreas Gruenbacher 			ret = 0;
1973b411b363SPhilipp Reisner 			break;
19747be8da07SAndreas Gruenbacher 		}
1975b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1976b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
1977b411b363SPhilipp Reisner 			break;
1978b411b363SPhilipp Reisner 		}
19797be8da07SAndreas Gruenbacher 		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
1980b411b363SPhilipp Reisner 		spin_unlock(&mdev->peer_seq_lock);
198144ed167dSPhilipp Reisner 		rcu_read_lock();
198244ed167dSPhilipp Reisner 		timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
198344ed167dSPhilipp Reisner 		rcu_read_unlock();
198471b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
1985b411b363SPhilipp Reisner 		spin_lock(&mdev->peer_seq_lock);
19867be8da07SAndreas Gruenbacher 		if (!timeout) {
1987b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
198871b1c1ebSAndreas Gruenbacher 			dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
1989b411b363SPhilipp Reisner 			break;
1990b411b363SPhilipp Reisner 		}
1991b411b363SPhilipp Reisner 	}
1992b411b363SPhilipp Reisner 	spin_unlock(&mdev->peer_seq_lock);
19937be8da07SAndreas Gruenbacher 	finish_wait(&mdev->seq_wait, &wait);
1994b411b363SPhilipp Reisner 	return ret;
1995b411b363SPhilipp Reisner }
1996b411b363SPhilipp Reisner 
1997688593c5SLars Ellenberg /* see also bio_flags_to_wire()
1998688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
1999688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2000688593c5SLars Ellenberg static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
200176d2e7ecSPhilipp Reisner {
200276d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
200376d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
2004688593c5SLars Ellenberg 		(dpf & DP_FLUSH ? REQ_FLUSH : 0) |
200576d2e7ecSPhilipp Reisner 		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
200676d2e7ecSPhilipp Reisner }
200776d2e7ecSPhilipp Reisner 
20087be8da07SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
20097be8da07SAndreas Gruenbacher 				    unsigned int size)
2010b411b363SPhilipp Reisner {
20117be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
20127be8da07SAndreas Gruenbacher 
20137be8da07SAndreas Gruenbacher     repeat:
20147be8da07SAndreas Gruenbacher 	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
20157be8da07SAndreas Gruenbacher 		struct drbd_request *req;
20167be8da07SAndreas Gruenbacher 		struct bio_and_error m;
20177be8da07SAndreas Gruenbacher 
20187be8da07SAndreas Gruenbacher 		if (!i->local)
20197be8da07SAndreas Gruenbacher 			continue;
20207be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
20217be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
20227be8da07SAndreas Gruenbacher 			continue;
20237be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
20247be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
20257be8da07SAndreas Gruenbacher 		spin_unlock_irq(&mdev->tconn->req_lock);
20267be8da07SAndreas Gruenbacher 		if (m.bio)
20277be8da07SAndreas Gruenbacher 			complete_master_bio(mdev, &m);
20287be8da07SAndreas Gruenbacher 		spin_lock_irq(&mdev->tconn->req_lock);
20297be8da07SAndreas Gruenbacher 		goto repeat;
20307be8da07SAndreas Gruenbacher 	}
20317be8da07SAndreas Gruenbacher }
20327be8da07SAndreas Gruenbacher 
20337be8da07SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_conf *mdev,
20347be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
20357be8da07SAndreas Gruenbacher {
20367be8da07SAndreas Gruenbacher 	struct drbd_tconn *tconn = mdev->tconn;
2037427c0434SLars Ellenberg 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
20387be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
20397be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
20407be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
20417be8da07SAndreas Gruenbacher 	bool equal;
20427be8da07SAndreas Gruenbacher 	int err;
20437be8da07SAndreas Gruenbacher 
20447be8da07SAndreas Gruenbacher 	/*
20457be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
20467be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
20477be8da07SAndreas Gruenbacher 	 */
20487be8da07SAndreas Gruenbacher 	drbd_insert_interval(&mdev->write_requests, &peer_req->i);
20497be8da07SAndreas Gruenbacher 
20507be8da07SAndreas Gruenbacher     repeat:
20517be8da07SAndreas Gruenbacher 	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
20527be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
20537be8da07SAndreas Gruenbacher 			continue;
20547be8da07SAndreas Gruenbacher 
20557be8da07SAndreas Gruenbacher 		if (!i->local) {
20567be8da07SAndreas Gruenbacher 			/*
20577be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
20587be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
20597be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
20607be8da07SAndreas Gruenbacher 			 */
20617be8da07SAndreas Gruenbacher 			err = drbd_wait_misc(mdev, i);
20627be8da07SAndreas Gruenbacher 			if (err)
20637be8da07SAndreas Gruenbacher 				goto out;
20647be8da07SAndreas Gruenbacher 			goto repeat;
20657be8da07SAndreas Gruenbacher 		}
20667be8da07SAndreas Gruenbacher 
20677be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
20687be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
20697be8da07SAndreas Gruenbacher 			/*
20707be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2071d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2072d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2073d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
20747be8da07SAndreas Gruenbacher 			 */
2075d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
20767be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
20777be8da07SAndreas Gruenbacher 
20787be8da07SAndreas Gruenbacher 			if (!equal)
20797be8da07SAndreas Gruenbacher 				dev_alert(DEV, "Concurrent writes detected: "
20807be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
20817be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
20827be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
20837be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2084d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
20857be8da07SAndreas Gruenbacher 
20867be8da07SAndreas Gruenbacher 			inc_unacked(mdev);
2087d4dabbe2SLars Ellenberg 			peer_req->w.cb = superseded ? e_send_superseded :
20887be8da07SAndreas Gruenbacher 						   e_send_retry_write;
20897be8da07SAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &mdev->done_ee);
20907be8da07SAndreas Gruenbacher 			wake_asender(mdev->tconn);
20917be8da07SAndreas Gruenbacher 
20927be8da07SAndreas Gruenbacher 			err = -ENOENT;
20937be8da07SAndreas Gruenbacher 			goto out;
20947be8da07SAndreas Gruenbacher 		} else {
20957be8da07SAndreas Gruenbacher 			struct drbd_request *req =
20967be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
20977be8da07SAndreas Gruenbacher 
20987be8da07SAndreas Gruenbacher 			if (!equal)
20997be8da07SAndreas Gruenbacher 				dev_alert(DEV, "Concurrent writes detected: "
21007be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
21017be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
21027be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
21037be8da07SAndreas Gruenbacher 
21047be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
21057be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
21067be8da07SAndreas Gruenbacher 				/*
21077be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2108d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2109d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2110d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
21117be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
21127be8da07SAndreas Gruenbacher 				 *
21137be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
21147be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
21157be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
21167be8da07SAndreas Gruenbacher 				 */
21177be8da07SAndreas Gruenbacher 				err = drbd_wait_misc(mdev, &req->i);
21187be8da07SAndreas Gruenbacher 				if (err) {
21197be8da07SAndreas Gruenbacher 					_conn_request_state(mdev->tconn,
21207be8da07SAndreas Gruenbacher 							    NS(conn, C_TIMEOUT),
21217be8da07SAndreas Gruenbacher 							    CS_HARD);
21227be8da07SAndreas Gruenbacher 					fail_postponed_requests(mdev, sector, size);
21237be8da07SAndreas Gruenbacher 					goto out;
21247be8da07SAndreas Gruenbacher 				}
21257be8da07SAndreas Gruenbacher 				goto repeat;
21267be8da07SAndreas Gruenbacher 			}
21277be8da07SAndreas Gruenbacher 			/*
21287be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
21297be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
21307be8da07SAndreas Gruenbacher 			 */
21317be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
21327be8da07SAndreas Gruenbacher 		}
21337be8da07SAndreas Gruenbacher 	}
21347be8da07SAndreas Gruenbacher 	err = 0;
21357be8da07SAndreas Gruenbacher 
21367be8da07SAndreas Gruenbacher     out:
21377be8da07SAndreas Gruenbacher 	if (err)
21387be8da07SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(mdev, peer_req);
21397be8da07SAndreas Gruenbacher 	return err;
21407be8da07SAndreas Gruenbacher }
21417be8da07SAndreas Gruenbacher 
2142b411b363SPhilipp Reisner /* mirrored write */
21434a76b161SAndreas Gruenbacher static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
2144b411b363SPhilipp Reisner {
21454a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
2146b411b363SPhilipp Reisner 	sector_t sector;
2147db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2148e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21497be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2150b411b363SPhilipp Reisner 	int rw = WRITE;
2151b411b363SPhilipp Reisner 	u32 dp_flags;
2152302bdeaeSPhilipp Reisner 	int err, tp;
21537be8da07SAndreas Gruenbacher 
21544a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
21554a76b161SAndreas Gruenbacher 	if (!mdev)
21564a76b161SAndreas Gruenbacher 		return -EIO;
2157b411b363SPhilipp Reisner 
2158b411b363SPhilipp Reisner 	if (!get_ldev(mdev)) {
215982bc0194SAndreas Gruenbacher 		int err2;
2160b411b363SPhilipp Reisner 
21617be8da07SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(mdev, peer_seq);
2162e2857216SAndreas Gruenbacher 		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
216312038a3aSPhilipp Reisner 		atomic_inc(&tconn->current_epoch->epoch_size);
2164e2857216SAndreas Gruenbacher 		err2 = drbd_drain_block(mdev, pi->size);
216582bc0194SAndreas Gruenbacher 		if (!err)
216682bc0194SAndreas Gruenbacher 			err = err2;
216782bc0194SAndreas Gruenbacher 		return err;
2168b411b363SPhilipp Reisner 	}
2169b411b363SPhilipp Reisner 
2170fcefa62eSAndreas Gruenbacher 	/*
2171fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2172fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2173fcefa62eSAndreas Gruenbacher 	 * end of this function.
2174fcefa62eSAndreas Gruenbacher 	 */
2175b411b363SPhilipp Reisner 
2176b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2177e2857216SAndreas Gruenbacher 	peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
2178db830c46SAndreas Gruenbacher 	if (!peer_req) {
2179b411b363SPhilipp Reisner 		put_ldev(mdev);
218082bc0194SAndreas Gruenbacher 		return -EIO;
2181b411b363SPhilipp Reisner 	}
2182b411b363SPhilipp Reisner 
2183db830c46SAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
2184b411b363SPhilipp Reisner 
2185688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2186688593c5SLars Ellenberg 	rw |= wire_flags_to_bio(mdev, dp_flags);
218781a3537aSLars Ellenberg 	if (peer_req->pages == NULL) {
218881a3537aSLars Ellenberg 		D_ASSERT(peer_req->i.size == 0);
2189a73ff323SLars Ellenberg 		D_ASSERT(dp_flags & DP_FLUSH);
2190a73ff323SLars Ellenberg 	}
2191688593c5SLars Ellenberg 
2192688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2193db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2194688593c5SLars Ellenberg 
219512038a3aSPhilipp Reisner 	spin_lock(&tconn->epoch_lock);
219612038a3aSPhilipp Reisner 	peer_req->epoch = tconn->current_epoch;
2197db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2198db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
219912038a3aSPhilipp Reisner 	spin_unlock(&tconn->epoch_lock);
2200b411b363SPhilipp Reisner 
2201302bdeaeSPhilipp Reisner 	rcu_read_lock();
2202302bdeaeSPhilipp Reisner 	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2203302bdeaeSPhilipp Reisner 	rcu_read_unlock();
2204302bdeaeSPhilipp Reisner 	if (tp) {
2205302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
22067be8da07SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(mdev, peer_seq);
22077be8da07SAndreas Gruenbacher 		if (err)
2208b411b363SPhilipp Reisner 			goto out_interrupted;
220987eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
22107be8da07SAndreas Gruenbacher 		err = handle_write_conflicts(mdev, peer_req);
22117be8da07SAndreas Gruenbacher 		if (err) {
221287eeee41SPhilipp Reisner 			spin_unlock_irq(&mdev->tconn->req_lock);
22137be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2214b411b363SPhilipp Reisner 				put_ldev(mdev);
221582bc0194SAndreas Gruenbacher 				return 0;
2216b411b363SPhilipp Reisner 			}
2217b411b363SPhilipp Reisner 			goto out_interrupted;
2218b411b363SPhilipp Reisner 		}
22197be8da07SAndreas Gruenbacher 	} else
222087eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
2221db830c46SAndreas Gruenbacher 	list_add(&peer_req->w.list, &mdev->active_ee);
222287eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
2223b411b363SPhilipp Reisner 
2224b6a370baSPhilipp Reisner 	if (mdev->state.conn == C_SYNC_TARGET)
22253ea35df8SPhilipp Reisner 		wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
2226b6a370baSPhilipp Reisner 
2227303d1448SPhilipp Reisner 	if (mdev->tconn->agreed_pro_version < 100) {
222844ed167dSPhilipp Reisner 		rcu_read_lock();
222944ed167dSPhilipp Reisner 		switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
2230b411b363SPhilipp Reisner 		case DRBD_PROT_C:
2231303d1448SPhilipp Reisner 			dp_flags |= DP_SEND_WRITE_ACK;
2232303d1448SPhilipp Reisner 			break;
2233303d1448SPhilipp Reisner 		case DRBD_PROT_B:
2234303d1448SPhilipp Reisner 			dp_flags |= DP_SEND_RECEIVE_ACK;
2235303d1448SPhilipp Reisner 			break;
2236303d1448SPhilipp Reisner 		}
223744ed167dSPhilipp Reisner 		rcu_read_unlock();
2238303d1448SPhilipp Reisner 	}
2239303d1448SPhilipp Reisner 
2240303d1448SPhilipp Reisner 	if (dp_flags & DP_SEND_WRITE_ACK) {
2241303d1448SPhilipp Reisner 		peer_req->flags |= EE_SEND_WRITE_ACK;
2242b411b363SPhilipp Reisner 		inc_unacked(mdev);
2243b411b363SPhilipp Reisner 		/* corresponding dec_unacked() in e_end_block()
2244b411b363SPhilipp Reisner 		 * respective _drbd_clear_done_ee */
2245303d1448SPhilipp Reisner 	}
2246303d1448SPhilipp Reisner 
2247303d1448SPhilipp Reisner 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
2248b411b363SPhilipp Reisner 		/* I really don't like it that the receiver thread
2249b411b363SPhilipp Reisner 		 * sends on the msock, but anyways */
2250db830c46SAndreas Gruenbacher 		drbd_send_ack(mdev, P_RECV_ACK, peer_req);
2251b411b363SPhilipp Reisner 	}
2252b411b363SPhilipp Reisner 
22536719fb03SLars Ellenberg 	if (mdev->state.pdsk < D_INCONSISTENT) {
2254b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2255db830c46SAndreas Gruenbacher 		drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2256db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2257db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2258181286adSLars Ellenberg 		drbd_al_begin_io(mdev, &peer_req->i);
2259b411b363SPhilipp Reisner 	}
2260b411b363SPhilipp Reisner 
226182bc0194SAndreas Gruenbacher 	err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
226282bc0194SAndreas Gruenbacher 	if (!err)
226382bc0194SAndreas Gruenbacher 		return 0;
2264b411b363SPhilipp Reisner 
226510f6d992SLars Ellenberg 	/* don't care for the reason here */
226610f6d992SLars Ellenberg 	dev_err(DEV, "submit failed, triggering re-connect\n");
226787eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
2268db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
2269db830c46SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(mdev, peer_req);
227087eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
2271db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2272181286adSLars Ellenberg 		drbd_al_complete_io(mdev, &peer_req->i);
227322cc37a9SLars Ellenberg 
2274b411b363SPhilipp Reisner out_interrupted:
22751e9dd291SPhilipp Reisner 	drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
2276b411b363SPhilipp Reisner 	put_ldev(mdev);
22773967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
227882bc0194SAndreas Gruenbacher 	return err;
2279b411b363SPhilipp Reisner }
2280b411b363SPhilipp Reisner 
22810f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
22820f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
22830f0601f4SLars Ellenberg  *
22840f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
22850f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
22860f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
22870f0601f4SLars Ellenberg  * activity, it obviously is "busy".
22880f0601f4SLars Ellenberg  *
22890f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
22900f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
22910f0601f4SLars Ellenberg  */
2292e3555d85SPhilipp Reisner int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
22930f0601f4SLars Ellenberg {
22940f0601f4SLars Ellenberg 	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
22950f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2296e3555d85SPhilipp Reisner 	struct lc_element *tmp;
22970f0601f4SLars Ellenberg 	int curr_events;
22980f0601f4SLars Ellenberg 	int throttle = 0;
2299daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2300daeda1ccSPhilipp Reisner 
2301daeda1ccSPhilipp Reisner 	rcu_read_lock();
2302daeda1ccSPhilipp Reisner 	c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2303daeda1ccSPhilipp Reisner 	rcu_read_unlock();
23040f0601f4SLars Ellenberg 
23050f0601f4SLars Ellenberg 	/* feature disabled? */
2306daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
23070f0601f4SLars Ellenberg 		return 0;
23080f0601f4SLars Ellenberg 
2309e3555d85SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
2310e3555d85SPhilipp Reisner 	tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2311e3555d85SPhilipp Reisner 	if (tmp) {
2312e3555d85SPhilipp Reisner 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2313e3555d85SPhilipp Reisner 		if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2314e3555d85SPhilipp Reisner 			spin_unlock_irq(&mdev->al_lock);
2315e3555d85SPhilipp Reisner 			return 0;
2316e3555d85SPhilipp Reisner 		}
2317e3555d85SPhilipp Reisner 		/* Do not slow down if app IO is already waiting for this extent */
2318e3555d85SPhilipp Reisner 	}
2319e3555d85SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
2320e3555d85SPhilipp Reisner 
23210f0601f4SLars Ellenberg 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
23220f0601f4SLars Ellenberg 		      (int)part_stat_read(&disk->part0, sectors[1]) -
23230f0601f4SLars Ellenberg 			atomic_read(&mdev->rs_sect_ev);
2324e3555d85SPhilipp Reisner 
23250f0601f4SLars Ellenberg 	if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
23260f0601f4SLars Ellenberg 		unsigned long rs_left;
23270f0601f4SLars Ellenberg 		int i;
23280f0601f4SLars Ellenberg 
23290f0601f4SLars Ellenberg 		mdev->rs_last_events = curr_events;
23300f0601f4SLars Ellenberg 
23310f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
23320f0601f4SLars Ellenberg 		 * approx. */
23332649f080SLars Ellenberg 		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
23342649f080SLars Ellenberg 
23352649f080SLars Ellenberg 		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
23362649f080SLars Ellenberg 			rs_left = mdev->ov_left;
23372649f080SLars Ellenberg 		else
23380f0601f4SLars Ellenberg 			rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
23390f0601f4SLars Ellenberg 
23400f0601f4SLars Ellenberg 		dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
23410f0601f4SLars Ellenberg 		if (!dt)
23420f0601f4SLars Ellenberg 			dt++;
23430f0601f4SLars Ellenberg 		db = mdev->rs_mark_left[i] - rs_left;
23440f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
23450f0601f4SLars Ellenberg 
2346daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
23470f0601f4SLars Ellenberg 			throttle = 1;
23480f0601f4SLars Ellenberg 	}
23490f0601f4SLars Ellenberg 	return throttle;
23500f0601f4SLars Ellenberg }
23510f0601f4SLars Ellenberg 
23520f0601f4SLars Ellenberg 
23534a76b161SAndreas Gruenbacher static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
2354b411b363SPhilipp Reisner {
23554a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
2356b411b363SPhilipp Reisner 	sector_t sector;
23574a76b161SAndreas Gruenbacher 	sector_t capacity;
2358db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2359b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2360b18b37beSPhilipp Reisner 	int size, verb;
2361b411b363SPhilipp Reisner 	unsigned int fault_type;
2362e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
23634a76b161SAndreas Gruenbacher 
23644a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
23654a76b161SAndreas Gruenbacher 	if (!mdev)
23664a76b161SAndreas Gruenbacher 		return -EIO;
23674a76b161SAndreas Gruenbacher 	capacity = drbd_get_capacity(mdev->this_bdev);
2368b411b363SPhilipp Reisner 
2369b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2370b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2371b411b363SPhilipp Reisner 
2372c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2373b411b363SPhilipp Reisner 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2374b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
237582bc0194SAndreas Gruenbacher 		return -EINVAL;
2376b411b363SPhilipp Reisner 	}
2377b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2378b411b363SPhilipp Reisner 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2379b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
238082bc0194SAndreas Gruenbacher 		return -EINVAL;
2381b411b363SPhilipp Reisner 	}
2382b411b363SPhilipp Reisner 
2383b411b363SPhilipp Reisner 	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
2384b18b37beSPhilipp Reisner 		verb = 1;
2385e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2386b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
2387b18b37beSPhilipp Reisner 			drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2388b18b37beSPhilipp Reisner 			break;
2389b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2390b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2391b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
2392b18b37beSPhilipp Reisner 			drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2393b18b37beSPhilipp Reisner 			break;
2394b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2395b18b37beSPhilipp Reisner 			verb = 0;
2396b18b37beSPhilipp Reisner 			dec_rs_pending(mdev);
2397b18b37beSPhilipp Reisner 			drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2398b18b37beSPhilipp Reisner 			break;
2399b18b37beSPhilipp Reisner 		default:
240049ba9b1bSAndreas Gruenbacher 			BUG();
2401b18b37beSPhilipp Reisner 		}
2402b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2403b411b363SPhilipp Reisner 			dev_err(DEV, "Can not satisfy peer's read request, "
2404b411b363SPhilipp Reisner 			    "no local data.\n");
2405b18b37beSPhilipp Reisner 
2406a821cc4aSLars Ellenberg 		/* drain possibly payload */
2407e2857216SAndreas Gruenbacher 		return drbd_drain_block(mdev, pi->size);
2408b411b363SPhilipp Reisner 	}
2409b411b363SPhilipp Reisner 
2410b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2411b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2412b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
24130db55363SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
2414db830c46SAndreas Gruenbacher 	if (!peer_req) {
2415b411b363SPhilipp Reisner 		put_ldev(mdev);
241682bc0194SAndreas Gruenbacher 		return -ENOMEM;
2417b411b363SPhilipp Reisner 	}
2418b411b363SPhilipp Reisner 
2419e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2420b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2421db830c46SAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2422b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
242380a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
242480a40e43SLars Ellenberg 		goto submit;
242580a40e43SLars Ellenberg 
2426b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2427db830c46SAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2428b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
24295f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
24305f9915bbSLars Ellenberg 		mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
2431b411b363SPhilipp Reisner 		break;
2432b411b363SPhilipp Reisner 
2433b411b363SPhilipp Reisner 	case P_OV_REPLY:
2434b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2435b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2436e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2437b411b363SPhilipp Reisner 		if (!di)
2438b411b363SPhilipp Reisner 			goto out_free_e;
2439b411b363SPhilipp Reisner 
2440e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2441b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2442b411b363SPhilipp Reisner 
2443db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2444db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2445c36c3cedSLars Ellenberg 
2446e2857216SAndreas Gruenbacher 		if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
2447b411b363SPhilipp Reisner 			goto out_free_e;
2448b411b363SPhilipp Reisner 
2449e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
245031890f4aSPhilipp Reisner 			D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
2451db830c46SAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
24525f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
24535f9915bbSLars Ellenberg 			mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
2454e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
24552649f080SLars Ellenberg 			/* track progress, we may need to throttle */
24562649f080SLars Ellenberg 			atomic_add(size >> 9, &mdev->rs_sect_in);
2457db830c46SAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2458b411b363SPhilipp Reisner 			dec_rs_pending(mdev);
24590f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
24600f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
24610f0601f4SLars Ellenberg 			goto submit_for_resync;
2462b411b363SPhilipp Reisner 		}
2463b411b363SPhilipp Reisner 		break;
2464b411b363SPhilipp Reisner 
2465b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2466b411b363SPhilipp Reisner 		if (mdev->ov_start_sector == ~(sector_t)0 &&
246731890f4aSPhilipp Reisner 		    mdev->tconn->agreed_pro_version >= 90) {
2468de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2469de228bbaSLars Ellenberg 			int i;
2470b411b363SPhilipp Reisner 			mdev->ov_start_sector = sector;
2471b411b363SPhilipp Reisner 			mdev->ov_position = sector;
247230b743a2SLars Ellenberg 			mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
247330b743a2SLars Ellenberg 			mdev->rs_total = mdev->ov_left;
2474de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2475de228bbaSLars Ellenberg 				mdev->rs_mark_left[i] = mdev->ov_left;
2476de228bbaSLars Ellenberg 				mdev->rs_mark_time[i] = now;
2477de228bbaSLars Ellenberg 			}
2478b411b363SPhilipp Reisner 			dev_info(DEV, "Online Verify start sector: %llu\n",
2479b411b363SPhilipp Reisner 					(unsigned long long)sector);
2480b411b363SPhilipp Reisner 		}
2481db830c46SAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2482b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2483b411b363SPhilipp Reisner 		break;
2484b411b363SPhilipp Reisner 
2485b411b363SPhilipp Reisner 	default:
248649ba9b1bSAndreas Gruenbacher 		BUG();
2487b411b363SPhilipp Reisner 	}
2488b411b363SPhilipp Reisner 
24890f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
24900f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
24910f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
24920f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
24930f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
24940f0601f4SLars Ellenberg 	 *
24950f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
24960f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
24970f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
24980f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
24990f0601f4SLars Ellenberg 	 * a while, anyways.
25000f0601f4SLars Ellenberg 	 */
2501b411b363SPhilipp Reisner 
25020f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
25030f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
25040f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
25050f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
25060f0601f4SLars Ellenberg 	 *
25070f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
25080f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
25090f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
25100f0601f4SLars Ellenberg 	 */
2511e3555d85SPhilipp Reisner 	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2512e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2513e3555d85SPhilipp Reisner 	if (drbd_rs_begin_io(mdev, sector))
251480a40e43SLars Ellenberg 		goto out_free_e;
2515b411b363SPhilipp Reisner 
25160f0601f4SLars Ellenberg submit_for_resync:
25170f0601f4SLars Ellenberg 	atomic_add(size >> 9, &mdev->rs_sect_ev);
25180f0601f4SLars Ellenberg 
251980a40e43SLars Ellenberg submit:
2520b411b363SPhilipp Reisner 	inc_unacked(mdev);
252187eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
2522db830c46SAndreas Gruenbacher 	list_add_tail(&peer_req->w.list, &mdev->read_ee);
252387eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
2524b411b363SPhilipp Reisner 
2525fbe29decSAndreas Gruenbacher 	if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
252682bc0194SAndreas Gruenbacher 		return 0;
2527b411b363SPhilipp Reisner 
252810f6d992SLars Ellenberg 	/* don't care for the reason here */
252910f6d992SLars Ellenberg 	dev_err(DEV, "submit failed, triggering re-connect\n");
253087eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
2531db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
253287eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
253322cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
253422cc37a9SLars Ellenberg 
2535b411b363SPhilipp Reisner out_free_e:
2536b411b363SPhilipp Reisner 	put_ldev(mdev);
25373967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
253882bc0194SAndreas Gruenbacher 	return -EIO;
2539b411b363SPhilipp Reisner }
2540b411b363SPhilipp Reisner 
2541b411b363SPhilipp Reisner static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2542b411b363SPhilipp Reisner {
2543b411b363SPhilipp Reisner 	int self, peer, rv = -100;
2544b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
254544ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
2546b411b363SPhilipp Reisner 
2547b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2548b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_BITMAP] & 1;
2549b411b363SPhilipp Reisner 
2550b411b363SPhilipp Reisner 	ch_peer = mdev->p_uuid[UI_SIZE];
2551b411b363SPhilipp Reisner 	ch_self = mdev->comm_bm_set;
2552b411b363SPhilipp Reisner 
255344ed167dSPhilipp Reisner 	rcu_read_lock();
255444ed167dSPhilipp Reisner 	after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
255544ed167dSPhilipp Reisner 	rcu_read_unlock();
255644ed167dSPhilipp Reisner 	switch (after_sb_0p) {
2557b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2558b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2559b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
256044ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
2561b411b363SPhilipp Reisner 		dev_err(DEV, "Configuration error.\n");
2562b411b363SPhilipp Reisner 		break;
2563b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2564b411b363SPhilipp Reisner 		break;
2565b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2566b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2567b411b363SPhilipp Reisner 			rv = -1;
2568b411b363SPhilipp Reisner 			break;
2569b411b363SPhilipp Reisner 		}
2570b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2571b411b363SPhilipp Reisner 			rv =  1;
2572b411b363SPhilipp Reisner 			break;
2573b411b363SPhilipp Reisner 		}
2574b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2575b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2576b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2577b411b363SPhilipp Reisner 			rv = 1;
2578b411b363SPhilipp Reisner 			break;
2579b411b363SPhilipp Reisner 		}
2580b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2581b411b363SPhilipp Reisner 			rv = -1;
2582b411b363SPhilipp Reisner 			break;
2583b411b363SPhilipp Reisner 		}
2584b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2585ad19bf6eSLars Ellenberg 		dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
2586b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
2587b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2588b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
2589427c0434SLars Ellenberg 			rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
2590b411b363SPhilipp Reisner 				? -1 : 1;
2591b411b363SPhilipp Reisner 			break;
2592b411b363SPhilipp Reisner 		} else {
2593b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
2594b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
2595b411b363SPhilipp Reisner 		}
259644ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
2597b411b363SPhilipp Reisner 			break;
2598b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2599b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
2600b411b363SPhilipp Reisner 			rv = -1;
2601b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
2602b411b363SPhilipp Reisner 			rv =  1;
2603b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
2604b411b363SPhilipp Reisner 		     /* Well, then use something else. */
2605427c0434SLars Ellenberg 			rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
2606b411b363SPhilipp Reisner 				? -1 : 1;
2607b411b363SPhilipp Reisner 		break;
2608b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2609b411b363SPhilipp Reisner 		rv = -1;
2610b411b363SPhilipp Reisner 		break;
2611b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2612b411b363SPhilipp Reisner 		rv =  1;
2613b411b363SPhilipp Reisner 	}
2614b411b363SPhilipp Reisner 
2615b411b363SPhilipp Reisner 	return rv;
2616b411b363SPhilipp Reisner }
2617b411b363SPhilipp Reisner 
2618b411b363SPhilipp Reisner static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2619b411b363SPhilipp Reisner {
26206184ea21SAndreas Gruenbacher 	int hg, rv = -100;
262144ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
2622b411b363SPhilipp Reisner 
262344ed167dSPhilipp Reisner 	rcu_read_lock();
262444ed167dSPhilipp Reisner 	after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
262544ed167dSPhilipp Reisner 	rcu_read_unlock();
262644ed167dSPhilipp Reisner 	switch (after_sb_1p) {
2627b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2628b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2629b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2630b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2631b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
263244ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2633b411b363SPhilipp Reisner 		dev_err(DEV, "Configuration error.\n");
2634b411b363SPhilipp Reisner 		break;
2635b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2636b411b363SPhilipp Reisner 		break;
2637b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2638b411b363SPhilipp Reisner 		hg = drbd_asb_recover_0p(mdev);
2639b411b363SPhilipp Reisner 		if (hg == -1 && mdev->state.role == R_SECONDARY)
2640b411b363SPhilipp Reisner 			rv = hg;
2641b411b363SPhilipp Reisner 		if (hg == 1  && mdev->state.role == R_PRIMARY)
2642b411b363SPhilipp Reisner 			rv = hg;
2643b411b363SPhilipp Reisner 		break;
2644b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
2645b411b363SPhilipp Reisner 		rv = drbd_asb_recover_0p(mdev);
2646b411b363SPhilipp Reisner 		break;
2647b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2648b411b363SPhilipp Reisner 		return mdev->state.role == R_PRIMARY ? 1 : -1;
2649b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
2650b411b363SPhilipp Reisner 		hg = drbd_asb_recover_0p(mdev);
2651b411b363SPhilipp Reisner 		if (hg == -1 && mdev->state.role == R_PRIMARY) {
2652bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2653bb437946SAndreas Gruenbacher 
2654bb437946SAndreas Gruenbacher 			drbd_set_role(mdev, R_SECONDARY, 0);
2655b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2656b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2657b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2658bb437946SAndreas Gruenbacher 			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2659bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2660b411b363SPhilipp Reisner 				drbd_khelper(mdev, "pri-lost-after-sb");
2661b411b363SPhilipp Reisner 			} else {
2662b411b363SPhilipp Reisner 				dev_warn(DEV, "Successfully gave up primary role.\n");
2663b411b363SPhilipp Reisner 				rv = hg;
2664b411b363SPhilipp Reisner 			}
2665b411b363SPhilipp Reisner 		} else
2666b411b363SPhilipp Reisner 			rv = hg;
2667b411b363SPhilipp Reisner 	}
2668b411b363SPhilipp Reisner 
2669b411b363SPhilipp Reisner 	return rv;
2670b411b363SPhilipp Reisner }
2671b411b363SPhilipp Reisner 
2672b411b363SPhilipp Reisner static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2673b411b363SPhilipp Reisner {
26746184ea21SAndreas Gruenbacher 	int hg, rv = -100;
267544ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
2676b411b363SPhilipp Reisner 
267744ed167dSPhilipp Reisner 	rcu_read_lock();
267844ed167dSPhilipp Reisner 	after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
267944ed167dSPhilipp Reisner 	rcu_read_unlock();
268044ed167dSPhilipp Reisner 	switch (after_sb_2p) {
2681b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2682b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2683b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2684b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2685b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2686b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2687b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
268844ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2689b411b363SPhilipp Reisner 		dev_err(DEV, "Configuration error.\n");
2690b411b363SPhilipp Reisner 		break;
2691b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
2692b411b363SPhilipp Reisner 		rv = drbd_asb_recover_0p(mdev);
2693b411b363SPhilipp Reisner 		break;
2694b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2695b411b363SPhilipp Reisner 		break;
2696b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
2697b411b363SPhilipp Reisner 		hg = drbd_asb_recover_0p(mdev);
2698b411b363SPhilipp Reisner 		if (hg == -1) {
2699bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2700bb437946SAndreas Gruenbacher 
2701b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2702b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2703b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2704bb437946SAndreas Gruenbacher 			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2705bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2706b411b363SPhilipp Reisner 				drbd_khelper(mdev, "pri-lost-after-sb");
2707b411b363SPhilipp Reisner 			} else {
2708b411b363SPhilipp Reisner 				dev_warn(DEV, "Successfully gave up primary role.\n");
2709b411b363SPhilipp Reisner 				rv = hg;
2710b411b363SPhilipp Reisner 			}
2711b411b363SPhilipp Reisner 		} else
2712b411b363SPhilipp Reisner 			rv = hg;
2713b411b363SPhilipp Reisner 	}
2714b411b363SPhilipp Reisner 
2715b411b363SPhilipp Reisner 	return rv;
2716b411b363SPhilipp Reisner }
2717b411b363SPhilipp Reisner 
2718b411b363SPhilipp Reisner static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2719b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
2720b411b363SPhilipp Reisner {
2721b411b363SPhilipp Reisner 	if (!uuid) {
2722b411b363SPhilipp Reisner 		dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2723b411b363SPhilipp Reisner 		return;
2724b411b363SPhilipp Reisner 	}
2725b411b363SPhilipp Reisner 	dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2726b411b363SPhilipp Reisner 	     text,
2727b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
2728b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
2729b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
2730b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
2731b411b363SPhilipp Reisner 	     (unsigned long long)bits,
2732b411b363SPhilipp Reisner 	     (unsigned long long)flags);
2733b411b363SPhilipp Reisner }
2734b411b363SPhilipp Reisner 
2735b411b363SPhilipp Reisner /*
2736b411b363SPhilipp Reisner   100	after split brain try auto recover
2737b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
2738b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
2739b411b363SPhilipp Reisner     0	no Sync
2740b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
2741b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
2742b411b363SPhilipp Reisner  -100	after split brain, disconnect
2743b411b363SPhilipp Reisner -1000	unrelated data
27444a23f264SPhilipp Reisner -1091   requires proto 91
27454a23f264SPhilipp Reisner -1096   requires proto 96
2746b411b363SPhilipp Reisner  */
2747b411b363SPhilipp Reisner static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2748b411b363SPhilipp Reisner {
2749b411b363SPhilipp Reisner 	u64 self, peer;
2750b411b363SPhilipp Reisner 	int i, j;
2751b411b363SPhilipp Reisner 
2752b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2753b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2754b411b363SPhilipp Reisner 
2755b411b363SPhilipp Reisner 	*rule_nr = 10;
2756b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2757b411b363SPhilipp Reisner 		return 0;
2758b411b363SPhilipp Reisner 
2759b411b363SPhilipp Reisner 	*rule_nr = 20;
2760b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2761b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
2762b411b363SPhilipp Reisner 		return -2;
2763b411b363SPhilipp Reisner 
2764b411b363SPhilipp Reisner 	*rule_nr = 30;
2765b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
2766b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
2767b411b363SPhilipp Reisner 		return 2;
2768b411b363SPhilipp Reisner 
2769b411b363SPhilipp Reisner 	if (self == peer) {
2770b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
2771b411b363SPhilipp Reisner 
2772b411b363SPhilipp Reisner 		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2773b411b363SPhilipp Reisner 
277431890f4aSPhilipp Reisner 			if (mdev->tconn->agreed_pro_version < 91)
27754a23f264SPhilipp Reisner 				return -1091;
2776b411b363SPhilipp Reisner 
2777b411b363SPhilipp Reisner 			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2778b411b363SPhilipp Reisner 			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2779b411b363SPhilipp Reisner 				dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
27809f2247bbSPhilipp Reisner 				drbd_uuid_move_history(mdev);
27819f2247bbSPhilipp Reisner 				mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
27829f2247bbSPhilipp Reisner 				mdev->ldev->md.uuid[UI_BITMAP] = 0;
2783b411b363SPhilipp Reisner 
2784b411b363SPhilipp Reisner 				drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2785b411b363SPhilipp Reisner 					       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2786b411b363SPhilipp Reisner 				*rule_nr = 34;
2787b411b363SPhilipp Reisner 			} else {
2788b411b363SPhilipp Reisner 				dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2789b411b363SPhilipp Reisner 				*rule_nr = 36;
2790b411b363SPhilipp Reisner 			}
2791b411b363SPhilipp Reisner 
2792b411b363SPhilipp Reisner 			return 1;
2793b411b363SPhilipp Reisner 		}
2794b411b363SPhilipp Reisner 
2795b411b363SPhilipp Reisner 		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2796b411b363SPhilipp Reisner 
279731890f4aSPhilipp Reisner 			if (mdev->tconn->agreed_pro_version < 91)
27984a23f264SPhilipp Reisner 				return -1091;
2799b411b363SPhilipp Reisner 
2800b411b363SPhilipp Reisner 			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2801b411b363SPhilipp Reisner 			    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2802b411b363SPhilipp Reisner 				dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2803b411b363SPhilipp Reisner 
2804b411b363SPhilipp Reisner 				mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2805b411b363SPhilipp Reisner 				mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2806b411b363SPhilipp Reisner 				mdev->p_uuid[UI_BITMAP] = 0UL;
2807b411b363SPhilipp Reisner 
2808b411b363SPhilipp Reisner 				drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2809b411b363SPhilipp Reisner 				*rule_nr = 35;
2810b411b363SPhilipp Reisner 			} else {
2811b411b363SPhilipp Reisner 				dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2812b411b363SPhilipp Reisner 				*rule_nr = 37;
2813b411b363SPhilipp Reisner 			}
2814b411b363SPhilipp Reisner 
2815b411b363SPhilipp Reisner 			return -1;
2816b411b363SPhilipp Reisner 		}
2817b411b363SPhilipp Reisner 
2818b411b363SPhilipp Reisner 		/* Common power [off|failure] */
2819b411b363SPhilipp Reisner 		rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2820b411b363SPhilipp Reisner 			(mdev->p_uuid[UI_FLAGS] & 2);
2821b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
2822b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
2823b411b363SPhilipp Reisner 		*rule_nr = 40;
2824b411b363SPhilipp Reisner 
2825b411b363SPhilipp Reisner 		switch (rct) {
2826b411b363SPhilipp Reisner 		case 0: /* !self_pri && !peer_pri */ return 0;
2827b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
2828b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
2829b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
2830427c0434SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
2831b411b363SPhilipp Reisner 			return dc ? -1 : 1;
2832b411b363SPhilipp Reisner 		}
2833b411b363SPhilipp Reisner 	}
2834b411b363SPhilipp Reisner 
2835b411b363SPhilipp Reisner 	*rule_nr = 50;
2836b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2837b411b363SPhilipp Reisner 	if (self == peer)
2838b411b363SPhilipp Reisner 		return -1;
2839b411b363SPhilipp Reisner 
2840b411b363SPhilipp Reisner 	*rule_nr = 51;
2841b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2842b411b363SPhilipp Reisner 	if (self == peer) {
284331890f4aSPhilipp Reisner 		if (mdev->tconn->agreed_pro_version < 96 ?
28444a23f264SPhilipp Reisner 		    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
28454a23f264SPhilipp Reisner 		    (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
28464a23f264SPhilipp Reisner 		    peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
2847b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
2848b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
2849b411b363SPhilipp Reisner 
285031890f4aSPhilipp Reisner 			if (mdev->tconn->agreed_pro_version < 91)
28514a23f264SPhilipp Reisner 				return -1091;
2852b411b363SPhilipp Reisner 
2853b411b363SPhilipp Reisner 			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2854b411b363SPhilipp Reisner 			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
28554a23f264SPhilipp Reisner 
285692b4ca29SLars Ellenberg 			dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
28574a23f264SPhilipp Reisner 			drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
28584a23f264SPhilipp Reisner 
2859b411b363SPhilipp Reisner 			return -1;
2860b411b363SPhilipp Reisner 		}
2861b411b363SPhilipp Reisner 	}
2862b411b363SPhilipp Reisner 
2863b411b363SPhilipp Reisner 	*rule_nr = 60;
2864b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2865b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2866b411b363SPhilipp Reisner 		peer = mdev->p_uuid[i] & ~((u64)1);
2867b411b363SPhilipp Reisner 		if (self == peer)
2868b411b363SPhilipp Reisner 			return -2;
2869b411b363SPhilipp Reisner 	}
2870b411b363SPhilipp Reisner 
2871b411b363SPhilipp Reisner 	*rule_nr = 70;
2872b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2873b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2874b411b363SPhilipp Reisner 	if (self == peer)
2875b411b363SPhilipp Reisner 		return 1;
2876b411b363SPhilipp Reisner 
2877b411b363SPhilipp Reisner 	*rule_nr = 71;
2878b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2879b411b363SPhilipp Reisner 	if (self == peer) {
288031890f4aSPhilipp Reisner 		if (mdev->tconn->agreed_pro_version < 96 ?
28814a23f264SPhilipp Reisner 		    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
28824a23f264SPhilipp Reisner 		    (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
28834a23f264SPhilipp Reisner 		    self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
2884b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
2885b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
2886b411b363SPhilipp Reisner 
288731890f4aSPhilipp Reisner 			if (mdev->tconn->agreed_pro_version < 91)
28884a23f264SPhilipp Reisner 				return -1091;
2889b411b363SPhilipp Reisner 
28909f2247bbSPhilipp Reisner 			__drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
28919f2247bbSPhilipp Reisner 			__drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2892b411b363SPhilipp Reisner 
28934a23f264SPhilipp Reisner 			dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
2894b411b363SPhilipp Reisner 			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2895b411b363SPhilipp Reisner 				       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2896b411b363SPhilipp Reisner 
2897b411b363SPhilipp Reisner 			return 1;
2898b411b363SPhilipp Reisner 		}
2899b411b363SPhilipp Reisner 	}
2900b411b363SPhilipp Reisner 
2901b411b363SPhilipp Reisner 
2902b411b363SPhilipp Reisner 	*rule_nr = 80;
2903d8c2a36bSPhilipp Reisner 	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2904b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2905b411b363SPhilipp Reisner 		self = mdev->ldev->md.uuid[i] & ~((u64)1);
2906b411b363SPhilipp Reisner 		if (self == peer)
2907b411b363SPhilipp Reisner 			return 2;
2908b411b363SPhilipp Reisner 	}
2909b411b363SPhilipp Reisner 
2910b411b363SPhilipp Reisner 	*rule_nr = 90;
2911b411b363SPhilipp Reisner 	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2912b411b363SPhilipp Reisner 	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2913b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
2914b411b363SPhilipp Reisner 		return 100;
2915b411b363SPhilipp Reisner 
2916b411b363SPhilipp Reisner 	*rule_nr = 100;
2917b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2918b411b363SPhilipp Reisner 		self = mdev->ldev->md.uuid[i] & ~((u64)1);
2919b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2920b411b363SPhilipp Reisner 			peer = mdev->p_uuid[j] & ~((u64)1);
2921b411b363SPhilipp Reisner 			if (self == peer)
2922b411b363SPhilipp Reisner 				return -100;
2923b411b363SPhilipp Reisner 		}
2924b411b363SPhilipp Reisner 	}
2925b411b363SPhilipp Reisner 
2926b411b363SPhilipp Reisner 	return -1000;
2927b411b363SPhilipp Reisner }
2928b411b363SPhilipp Reisner 
2929b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
2930b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
2931b411b363SPhilipp Reisner  */
2932b411b363SPhilipp Reisner static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2933b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
2934b411b363SPhilipp Reisner {
2935b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
2936b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
293744ed167dSPhilipp Reisner 	struct net_conf *nc;
29386dff2902SAndreas Gruenbacher 	int hg, rule_nr, rr_conflict, tentative;
2939b411b363SPhilipp Reisner 
2940b411b363SPhilipp Reisner 	mydisk = mdev->state.disk;
2941b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
2942b411b363SPhilipp Reisner 		mydisk = mdev->new_state_tmp.disk;
2943b411b363SPhilipp Reisner 
2944b411b363SPhilipp Reisner 	dev_info(DEV, "drbd_sync_handshake:\n");
29459f2247bbSPhilipp Reisner 
29469f2247bbSPhilipp Reisner 	spin_lock_irq(&mdev->ldev->md.uuid_lock);
2947b411b363SPhilipp Reisner 	drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2948b411b363SPhilipp Reisner 	drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2949b411b363SPhilipp Reisner 		       mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2950b411b363SPhilipp Reisner 
2951b411b363SPhilipp Reisner 	hg = drbd_uuid_compare(mdev, &rule_nr);
29529f2247bbSPhilipp Reisner 	spin_unlock_irq(&mdev->ldev->md.uuid_lock);
2953b411b363SPhilipp Reisner 
2954b411b363SPhilipp Reisner 	dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2955b411b363SPhilipp Reisner 
2956b411b363SPhilipp Reisner 	if (hg == -1000) {
2957b411b363SPhilipp Reisner 		dev_alert(DEV, "Unrelated data, aborting!\n");
2958b411b363SPhilipp Reisner 		return C_MASK;
2959b411b363SPhilipp Reisner 	}
29604a23f264SPhilipp Reisner 	if (hg < -1000) {
29614a23f264SPhilipp Reisner 		dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
2962b411b363SPhilipp Reisner 		return C_MASK;
2963b411b363SPhilipp Reisner 	}
2964b411b363SPhilipp Reisner 
2965b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2966b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
2967b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
2968b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
2969b411b363SPhilipp Reisner 		if (f)
2970b411b363SPhilipp Reisner 			hg = hg*2;
2971b411b363SPhilipp Reisner 		dev_info(DEV, "Becoming sync %s due to disk states.\n",
2972b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
2973b411b363SPhilipp Reisner 	}
2974b411b363SPhilipp Reisner 
29753a11a487SAdam Gandelman 	if (abs(hg) == 100)
29763a11a487SAdam Gandelman 		drbd_khelper(mdev, "initial-split-brain");
29773a11a487SAdam Gandelman 
297844ed167dSPhilipp Reisner 	rcu_read_lock();
297944ed167dSPhilipp Reisner 	nc = rcu_dereference(mdev->tconn->net_conf);
298044ed167dSPhilipp Reisner 
298144ed167dSPhilipp Reisner 	if (hg == 100 || (hg == -100 && nc->always_asbp)) {
2982b411b363SPhilipp Reisner 		int pcount = (mdev->state.role == R_PRIMARY)
2983b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
2984b411b363SPhilipp Reisner 		int forced = (hg == -100);
2985b411b363SPhilipp Reisner 
2986b411b363SPhilipp Reisner 		switch (pcount) {
2987b411b363SPhilipp Reisner 		case 0:
2988b411b363SPhilipp Reisner 			hg = drbd_asb_recover_0p(mdev);
2989b411b363SPhilipp Reisner 			break;
2990b411b363SPhilipp Reisner 		case 1:
2991b411b363SPhilipp Reisner 			hg = drbd_asb_recover_1p(mdev);
2992b411b363SPhilipp Reisner 			break;
2993b411b363SPhilipp Reisner 		case 2:
2994b411b363SPhilipp Reisner 			hg = drbd_asb_recover_2p(mdev);
2995b411b363SPhilipp Reisner 			break;
2996b411b363SPhilipp Reisner 		}
2997b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
2998b411b363SPhilipp Reisner 			dev_warn(DEV, "Split-Brain detected, %d primaries, "
2999b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3000b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3001b411b363SPhilipp Reisner 			if (forced) {
3002b411b363SPhilipp Reisner 				dev_warn(DEV, "Doing a full sync, since"
3003b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3004b411b363SPhilipp Reisner 				hg = hg*2;
3005b411b363SPhilipp Reisner 			}
3006b411b363SPhilipp Reisner 		}
3007b411b363SPhilipp Reisner 	}
3008b411b363SPhilipp Reisner 
3009b411b363SPhilipp Reisner 	if (hg == -100) {
301008b165baSPhilipp Reisner 		if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
3011b411b363SPhilipp Reisner 			hg = -1;
301208b165baSPhilipp Reisner 		if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
3013b411b363SPhilipp Reisner 			hg = 1;
3014b411b363SPhilipp Reisner 
3015b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3016b411b363SPhilipp Reisner 			dev_warn(DEV, "Split-Brain detected, manually solved. "
3017b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3018b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3019b411b363SPhilipp Reisner 	}
302044ed167dSPhilipp Reisner 	rr_conflict = nc->rr_conflict;
30216dff2902SAndreas Gruenbacher 	tentative = nc->tentative;
302244ed167dSPhilipp Reisner 	rcu_read_unlock();
3023b411b363SPhilipp Reisner 
3024b411b363SPhilipp Reisner 	if (hg == -100) {
3025580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3026580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3027580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3028580b9767SLars Ellenberg 		 * to that disk, in a way... */
30293a11a487SAdam Gandelman 		dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
3030b411b363SPhilipp Reisner 		drbd_khelper(mdev, "split-brain");
3031b411b363SPhilipp Reisner 		return C_MASK;
3032b411b363SPhilipp Reisner 	}
3033b411b363SPhilipp Reisner 
3034b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3035b411b363SPhilipp Reisner 		dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
3036b411b363SPhilipp Reisner 		return C_MASK;
3037b411b363SPhilipp Reisner 	}
3038b411b363SPhilipp Reisner 
3039b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3040b411b363SPhilipp Reisner 	    mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
304144ed167dSPhilipp Reisner 		switch (rr_conflict) {
3042b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3043b411b363SPhilipp Reisner 			drbd_khelper(mdev, "pri-lost");
3044b411b363SPhilipp Reisner 			/* fall through */
3045b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3046b411b363SPhilipp Reisner 			dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3047b411b363SPhilipp Reisner 			return C_MASK;
3048b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3049b411b363SPhilipp Reisner 			dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3050b411b363SPhilipp Reisner 			     "assumption\n");
3051b411b363SPhilipp Reisner 		}
3052b411b363SPhilipp Reisner 	}
3053b411b363SPhilipp Reisner 
30546dff2902SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
3055cf14c2e9SPhilipp Reisner 		if (hg == 0)
3056cf14c2e9SPhilipp Reisner 			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3057cf14c2e9SPhilipp Reisner 		else
3058cf14c2e9SPhilipp Reisner 			dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3059cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3060cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3061cf14c2e9SPhilipp Reisner 		return C_MASK;
3062cf14c2e9SPhilipp Reisner 	}
3063cf14c2e9SPhilipp Reisner 
3064b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3065b411b363SPhilipp Reisner 		dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
306620ceb2b2SLars Ellenberg 		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
306720ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3068b411b363SPhilipp Reisner 			return C_MASK;
3069b411b363SPhilipp Reisner 	}
3070b411b363SPhilipp Reisner 
3071b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3072b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3073b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3074b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3075b411b363SPhilipp Reisner 	} else {
3076b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3077b411b363SPhilipp Reisner 		if (drbd_bm_total_weight(mdev)) {
3078b411b363SPhilipp Reisner 			dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3079b411b363SPhilipp Reisner 			     drbd_bm_total_weight(mdev));
3080b411b363SPhilipp Reisner 		}
3081b411b363SPhilipp Reisner 	}
3082b411b363SPhilipp Reisner 
3083b411b363SPhilipp Reisner 	return rv;
3084b411b363SPhilipp Reisner }
3085b411b363SPhilipp Reisner 
3086f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3087b411b363SPhilipp Reisner {
3088b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3089f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3090f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3091b411b363SPhilipp Reisner 
3092b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3093f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3094f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3095b411b363SPhilipp Reisner 
3096b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3097f179d76dSPhilipp Reisner 	return peer;
3098b411b363SPhilipp Reisner }
3099b411b363SPhilipp Reisner 
3100e2857216SAndreas Gruenbacher static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
3101b411b363SPhilipp Reisner {
3102e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3103036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3104036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3105036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3106036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
3107accdbcc5SAndreas Gruenbacher 	struct crypto_hash *peer_integrity_tfm = NULL;
31087aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3109b411b363SPhilipp Reisner 
3110b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3111b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3112b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3113b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3114b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3115cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
31166139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3117cf14c2e9SPhilipp Reisner 
311886db0618SAndreas Gruenbacher 	if (tconn->agreed_pro_version >= 87) {
311986db0618SAndreas Gruenbacher 		int err;
312086db0618SAndreas Gruenbacher 
312188104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
312286db0618SAndreas Gruenbacher 			return -EIO;
312388104ca4SAndreas Gruenbacher 		err = drbd_recv_all(tconn, integrity_alg, pi->size);
312486db0618SAndreas Gruenbacher 		if (err)
312586db0618SAndreas Gruenbacher 			return err;
312688104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3127036b17eaSPhilipp Reisner 	}
312886db0618SAndreas Gruenbacher 
31297d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
31307204624cSPhilipp Reisner 		clear_bit(CONN_DRY_RUN, &tconn->flags);
3131cf14c2e9SPhilipp Reisner 
3132cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
31337204624cSPhilipp Reisner 			set_bit(CONN_DRY_RUN, &tconn->flags);
3134b411b363SPhilipp Reisner 
313544ed167dSPhilipp Reisner 		rcu_read_lock();
313644ed167dSPhilipp Reisner 		nc = rcu_dereference(tconn->net_conf);
313744ed167dSPhilipp Reisner 
3138036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
3139d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "protocol");
314044ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3141b411b363SPhilipp Reisner 		}
3142b411b363SPhilipp Reisner 
3143f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
3144d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
314544ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3146b411b363SPhilipp Reisner 		}
3147b411b363SPhilipp Reisner 
3148f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
3149d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
315044ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3151b411b363SPhilipp Reisner 		}
3152b411b363SPhilipp Reisner 
3153f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
3154d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
315544ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3156b411b363SPhilipp Reisner 		}
3157b411b363SPhilipp Reisner 
31586139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
3159d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
316044ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3161b411b363SPhilipp Reisner 		}
3162b411b363SPhilipp Reisner 
316344ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
3164d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
316544ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3166b411b363SPhilipp Reisner 		}
3167b411b363SPhilipp Reisner 
3168036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
3169d505d9beSAndreas Gruenbacher 			conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
3170036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3171036b17eaSPhilipp Reisner 		}
3172036b17eaSPhilipp Reisner 
317386db0618SAndreas Gruenbacher 		rcu_read_unlock();
3174fbc12f45SAndreas Gruenbacher 	}
31757d4c782cSAndreas Gruenbacher 
31767d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
31777d4c782cSAndreas Gruenbacher 		int hash_size;
31787d4c782cSAndreas Gruenbacher 
31797d4c782cSAndreas Gruenbacher 		/*
31807d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
31817d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
31827d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
31837d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
31847d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
31857d4c782cSAndreas Gruenbacher 		 * change.
31867d4c782cSAndreas Gruenbacher 		 */
31877d4c782cSAndreas Gruenbacher 
31887d4c782cSAndreas Gruenbacher 		peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
31897d4c782cSAndreas Gruenbacher 		if (!peer_integrity_tfm) {
31907d4c782cSAndreas Gruenbacher 			conn_err(tconn, "peer data-integrity-alg %s not supported\n",
31917d4c782cSAndreas Gruenbacher 				 integrity_alg);
3192b411b363SPhilipp Reisner 			goto disconnect;
3193b411b363SPhilipp Reisner 		}
3194b411b363SPhilipp Reisner 
31957d4c782cSAndreas Gruenbacher 		hash_size = crypto_hash_digestsize(peer_integrity_tfm);
31967d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
31977d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
31987d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
31997d4c782cSAndreas Gruenbacher 			conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
32007d4c782cSAndreas Gruenbacher 			goto disconnect;
32017d4c782cSAndreas Gruenbacher 		}
32027d4c782cSAndreas Gruenbacher 	}
32037d4c782cSAndreas Gruenbacher 
32047d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
32057d4c782cSAndreas Gruenbacher 	if (!new_net_conf) {
32067d4c782cSAndreas Gruenbacher 		conn_err(tconn, "Allocation of new net_conf failed\n");
3207b411b363SPhilipp Reisner 		goto disconnect;
3208b411b363SPhilipp Reisner 	}
3209b411b363SPhilipp Reisner 
32107d4c782cSAndreas Gruenbacher 	mutex_lock(&tconn->data.mutex);
32117d4c782cSAndreas Gruenbacher 	mutex_lock(&tconn->conf_update);
32127d4c782cSAndreas Gruenbacher 	old_net_conf = tconn->net_conf;
32137d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3214b411b363SPhilipp Reisner 
32157d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
32167d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
32177d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
32187d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
32197d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3220b411b363SPhilipp Reisner 
32217d4c782cSAndreas Gruenbacher 	rcu_assign_pointer(tconn->net_conf, new_net_conf);
32227d4c782cSAndreas Gruenbacher 	mutex_unlock(&tconn->conf_update);
32237d4c782cSAndreas Gruenbacher 	mutex_unlock(&tconn->data.mutex);
3224b411b363SPhilipp Reisner 
32257d4c782cSAndreas Gruenbacher 	crypto_free_hash(tconn->peer_integrity_tfm);
32267d4c782cSAndreas Gruenbacher 	kfree(tconn->int_dig_in);
32277d4c782cSAndreas Gruenbacher 	kfree(tconn->int_dig_vv);
32287d4c782cSAndreas Gruenbacher 	tconn->peer_integrity_tfm = peer_integrity_tfm;
32297d4c782cSAndreas Gruenbacher 	tconn->int_dig_in = int_dig_in;
32307d4c782cSAndreas Gruenbacher 	tconn->int_dig_vv = int_dig_vv;
3231b411b363SPhilipp Reisner 
32327d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
32337d4c782cSAndreas Gruenbacher 		conn_info(tconn, "peer data-integrity-alg: %s\n",
32347d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3235b411b363SPhilipp Reisner 
32367d4c782cSAndreas Gruenbacher 	synchronize_rcu();
32377d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
323882bc0194SAndreas Gruenbacher 	return 0;
3239b411b363SPhilipp Reisner 
324044ed167dSPhilipp Reisner disconnect_rcu_unlock:
324144ed167dSPhilipp Reisner 	rcu_read_unlock();
3242b411b363SPhilipp Reisner disconnect:
3243b792c35cSAndreas Gruenbacher 	crypto_free_hash(peer_integrity_tfm);
3244036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3245036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
32467204624cSPhilipp Reisner 	conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
324782bc0194SAndreas Gruenbacher 	return -EIO;
3248b411b363SPhilipp Reisner }
3249b411b363SPhilipp Reisner 
3250b411b363SPhilipp Reisner /* helper function
3251b411b363SPhilipp Reisner  * input: alg name, feature name
3252b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3253b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3254b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
3255b411b363SPhilipp Reisner struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3256b411b363SPhilipp Reisner 		const char *alg, const char *name)
3257b411b363SPhilipp Reisner {
3258b411b363SPhilipp Reisner 	struct crypto_hash *tfm;
3259b411b363SPhilipp Reisner 
3260b411b363SPhilipp Reisner 	if (!alg[0])
3261b411b363SPhilipp Reisner 		return NULL;
3262b411b363SPhilipp Reisner 
3263b411b363SPhilipp Reisner 	tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3264b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3265b411b363SPhilipp Reisner 		dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3266b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3267b411b363SPhilipp Reisner 		return tfm;
3268b411b363SPhilipp Reisner 	}
3269b411b363SPhilipp Reisner 	return tfm;
3270b411b363SPhilipp Reisner }
3271b411b363SPhilipp Reisner 
32724a76b161SAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
3273b411b363SPhilipp Reisner {
32744a76b161SAndreas Gruenbacher 	void *buffer = tconn->data.rbuf;
32754a76b161SAndreas Gruenbacher 	int size = pi->size;
32764a76b161SAndreas Gruenbacher 
32774a76b161SAndreas Gruenbacher 	while (size) {
32784a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
32794a76b161SAndreas Gruenbacher 		s = drbd_recv(tconn, buffer, s);
32804a76b161SAndreas Gruenbacher 		if (s <= 0) {
32814a76b161SAndreas Gruenbacher 			if (s < 0)
32824a76b161SAndreas Gruenbacher 				return s;
32834a76b161SAndreas Gruenbacher 			break;
32844a76b161SAndreas Gruenbacher 		}
32854a76b161SAndreas Gruenbacher 		size -= s;
32864a76b161SAndreas Gruenbacher 	}
32874a76b161SAndreas Gruenbacher 	if (size)
32884a76b161SAndreas Gruenbacher 		return -EIO;
32894a76b161SAndreas Gruenbacher 	return 0;
32904a76b161SAndreas Gruenbacher }
32914a76b161SAndreas Gruenbacher 
32924a76b161SAndreas Gruenbacher /*
32934a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
32944a76b161SAndreas Gruenbacher  *
32954a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
32964a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
32974a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
32984a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
32994a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
33004a76b161SAndreas Gruenbacher  *
33014a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
33024a76b161SAndreas Gruenbacher  */
33034a76b161SAndreas Gruenbacher static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
33044a76b161SAndreas Gruenbacher {
33052fcb8f30SAndreas Gruenbacher 	conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
33062fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
33074a76b161SAndreas Gruenbacher 	return ignore_remaining_packet(tconn, pi);
33084a76b161SAndreas Gruenbacher }
33094a76b161SAndreas Gruenbacher 
33104a76b161SAndreas Gruenbacher static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
33114a76b161SAndreas Gruenbacher {
33124a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3313e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3314b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
3315b411b363SPhilipp Reisner 	struct crypto_hash *verify_tfm = NULL;
3316b411b363SPhilipp Reisner 	struct crypto_hash *csums_tfm = NULL;
33172ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3318813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
33194a76b161SAndreas Gruenbacher 	const int apv = tconn->agreed_pro_version;
3320813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
3321778f271dSPhilipp Reisner 	int fifo_size = 0;
332282bc0194SAndreas Gruenbacher 	int err;
3323b411b363SPhilipp Reisner 
33244a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
33254a76b161SAndreas Gruenbacher 	if (!mdev)
33264a76b161SAndreas Gruenbacher 		return config_unknown_volume(tconn, pi);
3327b411b363SPhilipp Reisner 
3328b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3329b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3330b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
33318e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
33328e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3333b411b363SPhilipp Reisner 
3334e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3335b411b363SPhilipp Reisner 		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3336e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
333782bc0194SAndreas Gruenbacher 		return -EIO;
3338b411b363SPhilipp Reisner 	}
3339b411b363SPhilipp Reisner 
3340b411b363SPhilipp Reisner 	if (apv <= 88) {
3341e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3342e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
33438e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3344e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3345e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
3346b411b363SPhilipp Reisner 		D_ASSERT(data_size == 0);
33478e26f9ccSPhilipp Reisner 	} else {
3348e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3349e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
3350b411b363SPhilipp Reisner 		D_ASSERT(data_size == 0);
3351b411b363SPhilipp Reisner 	}
3352b411b363SPhilipp Reisner 
3353b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3354e658983aSAndreas Gruenbacher 	p = pi->data;
3355b411b363SPhilipp Reisner 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3356b411b363SPhilipp Reisner 
3357e658983aSAndreas Gruenbacher 	err = drbd_recv_all(mdev->tconn, p, header_size);
335882bc0194SAndreas Gruenbacher 	if (err)
335982bc0194SAndreas Gruenbacher 		return err;
3360b411b363SPhilipp Reisner 
3361813472ceSPhilipp Reisner 	mutex_lock(&mdev->tconn->conf_update);
3362813472ceSPhilipp Reisner 	old_net_conf = mdev->tconn->net_conf;
3363813472ceSPhilipp Reisner 	if (get_ldev(mdev)) {
3364daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3365daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3366813472ceSPhilipp Reisner 			put_ldev(mdev);
3367813472ceSPhilipp Reisner 			mutex_unlock(&mdev->tconn->conf_update);
3368daeda1ccSPhilipp Reisner 			dev_err(DEV, "Allocation of new disk_conf failed\n");
3369daeda1ccSPhilipp Reisner 			return -ENOMEM;
3370f399002eSLars Ellenberg 		}
3371b411b363SPhilipp Reisner 
3372daeda1ccSPhilipp Reisner 		old_disk_conf = mdev->ldev->disk_conf;
3373daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3374daeda1ccSPhilipp Reisner 
33756394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3376813472ceSPhilipp Reisner 	}
3377b411b363SPhilipp Reisner 
3378b411b363SPhilipp Reisner 	if (apv >= 88) {
3379b411b363SPhilipp Reisner 		if (apv == 88) {
33805de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
33815de73827SPhilipp Reisner 				dev_err(DEV, "verify-alg of wrong size, "
33825de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3383b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3384813472ceSPhilipp Reisner 				err = -EIO;
3385813472ceSPhilipp Reisner 				goto reconnect;
3386b411b363SPhilipp Reisner 			}
3387b411b363SPhilipp Reisner 
338882bc0194SAndreas Gruenbacher 			err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
3389813472ceSPhilipp Reisner 			if (err)
3390813472ceSPhilipp Reisner 				goto reconnect;
3391b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3392b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
3393b411b363SPhilipp Reisner 			D_ASSERT(p->verify_alg[data_size-1] == 0);
3394b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3395b411b363SPhilipp Reisner 
3396b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3397b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3398b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
3399b411b363SPhilipp Reisner 			D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3400b411b363SPhilipp Reisner 			D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3401b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3402b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3403b411b363SPhilipp Reisner 		}
3404b411b363SPhilipp Reisner 
34052ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3406b411b363SPhilipp Reisner 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3407b411b363SPhilipp Reisner 				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
34082ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3409b411b363SPhilipp Reisner 				goto disconnect;
3410b411b363SPhilipp Reisner 			}
3411b411b363SPhilipp Reisner 			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3412b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3413b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3414b411b363SPhilipp Reisner 				verify_tfm = NULL;
3415b411b363SPhilipp Reisner 				goto disconnect;
3416b411b363SPhilipp Reisner 			}
3417b411b363SPhilipp Reisner 		}
3418b411b363SPhilipp Reisner 
34192ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3420b411b363SPhilipp Reisner 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3421b411b363SPhilipp Reisner 				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
34222ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3423b411b363SPhilipp Reisner 				goto disconnect;
3424b411b363SPhilipp Reisner 			}
3425b411b363SPhilipp Reisner 			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3426b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3427b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3428b411b363SPhilipp Reisner 				csums_tfm = NULL;
3429b411b363SPhilipp Reisner 				goto disconnect;
3430b411b363SPhilipp Reisner 			}
3431b411b363SPhilipp Reisner 		}
3432b411b363SPhilipp Reisner 
3433813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3434daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3435daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3436daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3437daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3438778f271dSPhilipp Reisner 
3439daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
34409958c857SPhilipp Reisner 			if (fifo_size != mdev->rs_plan_s->size) {
3441813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3442813472ceSPhilipp Reisner 				if (!new_plan) {
3443778f271dSPhilipp Reisner 					dev_err(DEV, "kmalloc of fifo_buffer failed");
3444f399002eSLars Ellenberg 					put_ldev(mdev);
3445778f271dSPhilipp Reisner 					goto disconnect;
3446778f271dSPhilipp Reisner 				}
3447778f271dSPhilipp Reisner 			}
34488e26f9ccSPhilipp Reisner 		}
3449b411b363SPhilipp Reisner 
345091fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
34512ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
34522ec91e0eSPhilipp Reisner 			if (!new_net_conf) {
345391fd4dadSPhilipp Reisner 				dev_err(DEV, "Allocation of new net_conf failed\n");
345491fd4dadSPhilipp Reisner 				goto disconnect;
345591fd4dadSPhilipp Reisner 			}
345691fd4dadSPhilipp Reisner 
34572ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
345891fd4dadSPhilipp Reisner 
3459b411b363SPhilipp Reisner 			if (verify_tfm) {
34602ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
34612ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3462f399002eSLars Ellenberg 				crypto_free_hash(mdev->tconn->verify_tfm);
3463f399002eSLars Ellenberg 				mdev->tconn->verify_tfm = verify_tfm;
3464b411b363SPhilipp Reisner 				dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3465b411b363SPhilipp Reisner 			}
3466b411b363SPhilipp Reisner 			if (csums_tfm) {
34672ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
34682ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3469f399002eSLars Ellenberg 				crypto_free_hash(mdev->tconn->csums_tfm);
3470f399002eSLars Ellenberg 				mdev->tconn->csums_tfm = csums_tfm;
3471b411b363SPhilipp Reisner 				dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3472b411b363SPhilipp Reisner 			}
34732ec91e0eSPhilipp Reisner 			rcu_assign_pointer(tconn->net_conf, new_net_conf);
3474778f271dSPhilipp Reisner 		}
3475b411b363SPhilipp Reisner 	}
3476b411b363SPhilipp Reisner 
3477813472ceSPhilipp Reisner 	if (new_disk_conf) {
3478daeda1ccSPhilipp Reisner 		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3479813472ceSPhilipp Reisner 		put_ldev(mdev);
3480b411b363SPhilipp Reisner 	}
3481813472ceSPhilipp Reisner 
3482813472ceSPhilipp Reisner 	if (new_plan) {
3483813472ceSPhilipp Reisner 		old_plan = mdev->rs_plan_s;
3484813472ceSPhilipp Reisner 		rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3485813472ceSPhilipp Reisner 	}
3486daeda1ccSPhilipp Reisner 
3487daeda1ccSPhilipp Reisner 	mutex_unlock(&mdev->tconn->conf_update);
3488daeda1ccSPhilipp Reisner 	synchronize_rcu();
3489daeda1ccSPhilipp Reisner 	if (new_net_conf)
3490daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
3491daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
3492813472ceSPhilipp Reisner 	kfree(old_plan);
3493daeda1ccSPhilipp Reisner 
349482bc0194SAndreas Gruenbacher 	return 0;
3495b411b363SPhilipp Reisner 
3496813472ceSPhilipp Reisner reconnect:
3497813472ceSPhilipp Reisner 	if (new_disk_conf) {
3498813472ceSPhilipp Reisner 		put_ldev(mdev);
3499813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3500813472ceSPhilipp Reisner 	}
3501813472ceSPhilipp Reisner 	mutex_unlock(&mdev->tconn->conf_update);
3502813472ceSPhilipp Reisner 	return -EIO;
3503813472ceSPhilipp Reisner 
3504b411b363SPhilipp Reisner disconnect:
3505813472ceSPhilipp Reisner 	kfree(new_plan);
3506813472ceSPhilipp Reisner 	if (new_disk_conf) {
3507813472ceSPhilipp Reisner 		put_ldev(mdev);
3508813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3509813472ceSPhilipp Reisner 	}
3510a0095508SPhilipp Reisner 	mutex_unlock(&mdev->tconn->conf_update);
3511b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
3512b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
3513b411b363SPhilipp Reisner 	crypto_free_hash(csums_tfm);
3514b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
3515b411b363SPhilipp Reisner 	crypto_free_hash(verify_tfm);
351638fa9988SPhilipp Reisner 	conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
351782bc0194SAndreas Gruenbacher 	return -EIO;
3518b411b363SPhilipp Reisner }
3519b411b363SPhilipp Reisner 
3520b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
3521b411b363SPhilipp Reisner static void warn_if_differ_considerably(struct drbd_conf *mdev,
3522b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
3523b411b363SPhilipp Reisner {
3524b411b363SPhilipp Reisner 	sector_t d;
3525b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
3526b411b363SPhilipp Reisner 		return;
3527b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
3528b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
3529b411b363SPhilipp Reisner 		dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3530b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
3531b411b363SPhilipp Reisner }
3532b411b363SPhilipp Reisner 
35334a76b161SAndreas Gruenbacher static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
3534b411b363SPhilipp Reisner {
35354a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3536e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
3537b411b363SPhilipp Reisner 	enum determine_dev_size dd = unchanged;
3538b411b363SPhilipp Reisner 	sector_t p_size, p_usize, my_usize;
3539b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
3540e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
3541b411b363SPhilipp Reisner 
35424a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
35434a76b161SAndreas Gruenbacher 	if (!mdev)
35444a76b161SAndreas Gruenbacher 		return config_unknown_volume(tconn, pi);
35454a76b161SAndreas Gruenbacher 
3546b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
3547b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
3548b411b363SPhilipp Reisner 
3549b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
3550b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
3551b411b363SPhilipp Reisner 	mdev->p_size = p_size;
3552b411b363SPhilipp Reisner 
3553b411b363SPhilipp Reisner 	if (get_ldev(mdev)) {
3554daeda1ccSPhilipp Reisner 		rcu_read_lock();
3555daeda1ccSPhilipp Reisner 		my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3556daeda1ccSPhilipp Reisner 		rcu_read_unlock();
3557daeda1ccSPhilipp Reisner 
3558b411b363SPhilipp Reisner 		warn_if_differ_considerably(mdev, "lower level device sizes",
3559b411b363SPhilipp Reisner 			   p_size, drbd_get_max_capacity(mdev->ldev));
3560b411b363SPhilipp Reisner 		warn_if_differ_considerably(mdev, "user requested size",
3561daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
3562b411b363SPhilipp Reisner 
3563b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
3564b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
3565b411b363SPhilipp Reisner 		if (mdev->state.conn == C_WF_REPORT_PARAMS)
3566daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
3567b411b363SPhilipp Reisner 
3568b411b363SPhilipp Reisner 		/* Never shrink a device with usable data during connect.
3569b411b363SPhilipp Reisner 		   But allow online shrinking if we are connected. */
3570ef5e44a6SPhilipp Reisner 		if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
3571b411b363SPhilipp Reisner 		    drbd_get_capacity(mdev->this_bdev) &&
3572b411b363SPhilipp Reisner 		    mdev->state.disk >= D_OUTDATED &&
3573b411b363SPhilipp Reisner 		    mdev->state.conn < C_CONNECTED) {
3574b411b363SPhilipp Reisner 			dev_err(DEV, "The peer's disk size is too small!\n");
357538fa9988SPhilipp Reisner 			conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
3576b411b363SPhilipp Reisner 			put_ldev(mdev);
357782bc0194SAndreas Gruenbacher 			return -EIO;
3578b411b363SPhilipp Reisner 		}
3579daeda1ccSPhilipp Reisner 
3580daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
3581daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3582daeda1ccSPhilipp Reisner 
3583daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3584daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
3585daeda1ccSPhilipp Reisner 				dev_err(DEV, "Allocation of new disk_conf failed\n");
3586daeda1ccSPhilipp Reisner 				put_ldev(mdev);
3587daeda1ccSPhilipp Reisner 				return -ENOMEM;
3588daeda1ccSPhilipp Reisner 			}
3589daeda1ccSPhilipp Reisner 
3590daeda1ccSPhilipp Reisner 			mutex_lock(&mdev->tconn->conf_update);
3591daeda1ccSPhilipp Reisner 			old_disk_conf = mdev->ldev->disk_conf;
3592daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
3593daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
3594daeda1ccSPhilipp Reisner 
3595daeda1ccSPhilipp Reisner 			rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3596daeda1ccSPhilipp Reisner 			mutex_unlock(&mdev->tconn->conf_update);
3597daeda1ccSPhilipp Reisner 			synchronize_rcu();
3598daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
3599daeda1ccSPhilipp Reisner 
3600daeda1ccSPhilipp Reisner 			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3601daeda1ccSPhilipp Reisner 				 (unsigned long)my_usize);
3602daeda1ccSPhilipp Reisner 		}
3603daeda1ccSPhilipp Reisner 
3604b411b363SPhilipp Reisner 		put_ldev(mdev);
3605b411b363SPhilipp Reisner 	}
3606b411b363SPhilipp Reisner 
3607e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
3608b411b363SPhilipp Reisner 	if (get_ldev(mdev)) {
360924c4830cSBart Van Assche 		dd = drbd_determine_dev_size(mdev, ddsf);
3610b411b363SPhilipp Reisner 		put_ldev(mdev);
3611b411b363SPhilipp Reisner 		if (dd == dev_size_error)
361282bc0194SAndreas Gruenbacher 			return -EIO;
3613b411b363SPhilipp Reisner 		drbd_md_sync(mdev);
3614b411b363SPhilipp Reisner 	} else {
3615b411b363SPhilipp Reisner 		/* I am diskless, need to accept the peer's size. */
3616b411b363SPhilipp Reisner 		drbd_set_my_capacity(mdev, p_size);
3617b411b363SPhilipp Reisner 	}
3618b411b363SPhilipp Reisner 
361999432fccSPhilipp Reisner 	mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
362099432fccSPhilipp Reisner 	drbd_reconsider_max_bio_size(mdev);
362199432fccSPhilipp Reisner 
3622b411b363SPhilipp Reisner 	if (get_ldev(mdev)) {
3623b411b363SPhilipp Reisner 		if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3624b411b363SPhilipp Reisner 			mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3625b411b363SPhilipp Reisner 			ldsc = 1;
3626b411b363SPhilipp Reisner 		}
3627b411b363SPhilipp Reisner 
3628b411b363SPhilipp Reisner 		put_ldev(mdev);
3629b411b363SPhilipp Reisner 	}
3630b411b363SPhilipp Reisner 
3631b411b363SPhilipp Reisner 	if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3632b411b363SPhilipp Reisner 		if (be64_to_cpu(p->c_size) !=
3633b411b363SPhilipp Reisner 		    drbd_get_capacity(mdev->this_bdev) || ldsc) {
3634b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
3635b411b363SPhilipp Reisner 			 * needs to know my new size... */
3636e89b591cSPhilipp Reisner 			drbd_send_sizes(mdev, 0, ddsf);
3637b411b363SPhilipp Reisner 		}
3638b411b363SPhilipp Reisner 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3639b411b363SPhilipp Reisner 		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
3640b411b363SPhilipp Reisner 			if (mdev->state.pdsk >= D_INCONSISTENT &&
3641e89b591cSPhilipp Reisner 			    mdev->state.disk >= D_INCONSISTENT) {
3642e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
3643e89b591cSPhilipp Reisner 					dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3644b411b363SPhilipp Reisner 				else
3645e89b591cSPhilipp Reisner 					resync_after_online_grow(mdev);
3646e89b591cSPhilipp Reisner 			} else
3647b411b363SPhilipp Reisner 				set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3648b411b363SPhilipp Reisner 		}
3649b411b363SPhilipp Reisner 	}
3650b411b363SPhilipp Reisner 
365182bc0194SAndreas Gruenbacher 	return 0;
3652b411b363SPhilipp Reisner }
3653b411b363SPhilipp Reisner 
36544a76b161SAndreas Gruenbacher static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
3655b411b363SPhilipp Reisner {
36564a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3657e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
3658b411b363SPhilipp Reisner 	u64 *p_uuid;
365962b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
3660b411b363SPhilipp Reisner 
36614a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
36624a76b161SAndreas Gruenbacher 	if (!mdev)
36634a76b161SAndreas Gruenbacher 		return config_unknown_volume(tconn, pi);
36644a76b161SAndreas Gruenbacher 
3665b411b363SPhilipp Reisner 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3666063eacf8SJing Wang 	if (!p_uuid) {
3667063eacf8SJing Wang 		dev_err(DEV, "kmalloc of p_uuid failed\n");
3668063eacf8SJing Wang 		return false;
3669063eacf8SJing Wang 	}
3670b411b363SPhilipp Reisner 
3671b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3672b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
3673b411b363SPhilipp Reisner 
3674b411b363SPhilipp Reisner 	kfree(mdev->p_uuid);
3675b411b363SPhilipp Reisner 	mdev->p_uuid = p_uuid;
3676b411b363SPhilipp Reisner 
3677b411b363SPhilipp Reisner 	if (mdev->state.conn < C_CONNECTED &&
3678b411b363SPhilipp Reisner 	    mdev->state.disk < D_INCONSISTENT &&
3679b411b363SPhilipp Reisner 	    mdev->state.role == R_PRIMARY &&
3680b411b363SPhilipp Reisner 	    (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3681b411b363SPhilipp Reisner 		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3682b411b363SPhilipp Reisner 		    (unsigned long long)mdev->ed_uuid);
368338fa9988SPhilipp Reisner 		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
368482bc0194SAndreas Gruenbacher 		return -EIO;
3685b411b363SPhilipp Reisner 	}
3686b411b363SPhilipp Reisner 
3687b411b363SPhilipp Reisner 	if (get_ldev(mdev)) {
3688b411b363SPhilipp Reisner 		int skip_initial_sync =
3689b411b363SPhilipp Reisner 			mdev->state.conn == C_CONNECTED &&
369031890f4aSPhilipp Reisner 			mdev->tconn->agreed_pro_version >= 90 &&
3691b411b363SPhilipp Reisner 			mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3692b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
3693b411b363SPhilipp Reisner 		if (skip_initial_sync) {
3694b411b363SPhilipp Reisner 			dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3695b411b363SPhilipp Reisner 			drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
369620ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
369720ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
3698b411b363SPhilipp Reisner 			_drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3699b411b363SPhilipp Reisner 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
3700b411b363SPhilipp Reisner 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3701b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
3702b411b363SPhilipp Reisner 			drbd_md_sync(mdev);
370362b0da3aSLars Ellenberg 			updated_uuids = 1;
3704b411b363SPhilipp Reisner 		}
3705b411b363SPhilipp Reisner 		put_ldev(mdev);
370618a50fa2SPhilipp Reisner 	} else if (mdev->state.disk < D_INCONSISTENT &&
370718a50fa2SPhilipp Reisner 		   mdev->state.role == R_PRIMARY) {
370818a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
370918a50fa2SPhilipp Reisner 		   for me. */
371062b0da3aSLars Ellenberg 		updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3711b411b363SPhilipp Reisner 	}
3712b411b363SPhilipp Reisner 
3713b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
3714b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
3715b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
3716b411b363SPhilipp Reisner 	   new disk state... */
37178410da8fSPhilipp Reisner 	mutex_lock(mdev->state_mutex);
37188410da8fSPhilipp Reisner 	mutex_unlock(mdev->state_mutex);
3719b411b363SPhilipp Reisner 	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
372062b0da3aSLars Ellenberg 		updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
372162b0da3aSLars Ellenberg 
372262b0da3aSLars Ellenberg 	if (updated_uuids)
372362b0da3aSLars Ellenberg 		drbd_print_uuids(mdev, "receiver updated UUIDs to");
3724b411b363SPhilipp Reisner 
372582bc0194SAndreas Gruenbacher 	return 0;
3726b411b363SPhilipp Reisner }
3727b411b363SPhilipp Reisner 
3728b411b363SPhilipp Reisner /**
3729b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
3730b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
3731b411b363SPhilipp Reisner  */
3732b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
3733b411b363SPhilipp Reisner {
3734b411b363SPhilipp Reisner 	union drbd_state ms;
3735b411b363SPhilipp Reisner 
3736b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
3737369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
3738b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
3739b411b363SPhilipp Reisner 
3740b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3741b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3742b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3743b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
3744b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
3745b411b363SPhilipp Reisner 	};
3746b411b363SPhilipp Reisner 
3747b411b363SPhilipp Reisner 	ms.i = ps.i;
3748b411b363SPhilipp Reisner 
3749b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
3750b411b363SPhilipp Reisner 	ms.peer = ps.role;
3751b411b363SPhilipp Reisner 	ms.role = ps.peer;
3752b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
3753b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
3754b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3755b411b363SPhilipp Reisner 
3756b411b363SPhilipp Reisner 	return ms;
3757b411b363SPhilipp Reisner }
3758b411b363SPhilipp Reisner 
37594a76b161SAndreas Gruenbacher static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
3760b411b363SPhilipp Reisner {
37614a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3762e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
3763b411b363SPhilipp Reisner 	union drbd_state mask, val;
3764bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
3765b411b363SPhilipp Reisner 
37664a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
37674a76b161SAndreas Gruenbacher 	if (!mdev)
37684a76b161SAndreas Gruenbacher 		return -EIO;
37694a76b161SAndreas Gruenbacher 
3770b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
3771b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
3772b411b363SPhilipp Reisner 
3773427c0434SLars Ellenberg 	if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
37748410da8fSPhilipp Reisner 	    mutex_is_locked(mdev->state_mutex)) {
3775b411b363SPhilipp Reisner 		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
377682bc0194SAndreas Gruenbacher 		return 0;
3777b411b363SPhilipp Reisner 	}
3778b411b363SPhilipp Reisner 
3779b411b363SPhilipp Reisner 	mask = convert_state(mask);
3780b411b363SPhilipp Reisner 	val = convert_state(val);
3781b411b363SPhilipp Reisner 
3782b411b363SPhilipp Reisner 	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3783b411b363SPhilipp Reisner 	drbd_send_sr_reply(mdev, rv);
3784047cd4a6SPhilipp Reisner 
3785b411b363SPhilipp Reisner 	drbd_md_sync(mdev);
3786b411b363SPhilipp Reisner 
378782bc0194SAndreas Gruenbacher 	return 0;
3788b411b363SPhilipp Reisner }
3789b411b363SPhilipp Reisner 
3790e2857216SAndreas Gruenbacher static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
3791b411b363SPhilipp Reisner {
3792e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
3793dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
3794dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
3795dfafcc8aSPhilipp Reisner 
3796dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
3797dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
3798dfafcc8aSPhilipp Reisner 
3799427c0434SLars Ellenberg 	if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
3800dfafcc8aSPhilipp Reisner 	    mutex_is_locked(&tconn->cstate_mutex)) {
3801dfafcc8aSPhilipp Reisner 		conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
380282bc0194SAndreas Gruenbacher 		return 0;
3803dfafcc8aSPhilipp Reisner 	}
3804dfafcc8aSPhilipp Reisner 
3805dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
3806dfafcc8aSPhilipp Reisner 	val = convert_state(val);
3807dfafcc8aSPhilipp Reisner 
3808778bcf2eSPhilipp Reisner 	rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3809dfafcc8aSPhilipp Reisner 	conn_send_sr_reply(tconn, rv);
3810dfafcc8aSPhilipp Reisner 
381182bc0194SAndreas Gruenbacher 	return 0;
3812dfafcc8aSPhilipp Reisner }
3813dfafcc8aSPhilipp Reisner 
38144a76b161SAndreas Gruenbacher static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
3815b411b363SPhilipp Reisner {
38164a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3817e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
38184ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
3819b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
382065d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
3821b411b363SPhilipp Reisner 	int rv;
3822b411b363SPhilipp Reisner 
38234a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
38244a76b161SAndreas Gruenbacher 	if (!mdev)
38254a76b161SAndreas Gruenbacher 		return config_unknown_volume(tconn, pi);
38264a76b161SAndreas Gruenbacher 
3827b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
3828b411b363SPhilipp Reisner 
3829b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
3830b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
3831b411b363SPhilipp Reisner 		real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3832b411b363SPhilipp Reisner 		dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3833b411b363SPhilipp Reisner 	}
3834b411b363SPhilipp Reisner 
383587eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
3836b411b363SPhilipp Reisner  retry:
383778bae59bSPhilipp Reisner 	os = ns = drbd_read_state(mdev);
383887eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
3839b411b363SPhilipp Reisner 
3840545752d5SLars Ellenberg 	/* If some other part of the code (asender thread, timeout)
3841545752d5SLars Ellenberg 	 * already decided to close the connection again,
3842545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
3843545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
384458ffa580SLars Ellenberg 		return -ECONNRESET;
3845545752d5SLars Ellenberg 
384640424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
384740424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
384840424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
384940424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
385040424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
385140424e4aSLars Ellenberg 	 */
385240424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
385340424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
3854e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3855e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
3856e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
3857e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
3858e9ef7bb6SLars Ellenberg 		 * syncing states.
3859e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
3860e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
3861e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
3862e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
3863e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
3864e9ef7bb6SLars Ellenberg 
3865e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
3866e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
3867e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
3868e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
3869e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
3870e9ef7bb6SLars Ellenberg 			if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3871e9ef7bb6SLars Ellenberg 				drbd_resync_finished(mdev);
387282bc0194SAndreas Gruenbacher 			return 0;
3873e9ef7bb6SLars Ellenberg 		}
3874e9ef7bb6SLars Ellenberg 	}
3875e9ef7bb6SLars Ellenberg 
387602b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
387702b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
387802b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
387958ffa580SLars Ellenberg 		ov_out_of_sync_print(mdev);
388002b91b55SLars Ellenberg 		drbd_resync_finished(mdev);
388158ffa580SLars Ellenberg 		return 0;
388202b91b55SLars Ellenberg 	}
388302b91b55SLars Ellenberg 
3884e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
3885e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
3886e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
3887e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
3888e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
3889e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3890e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3891e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
3892e9ef7bb6SLars Ellenberg 
38934ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
38944ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
3895b411b363SPhilipp Reisner 
389667531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
389767531718SPhilipp Reisner 		ns.conn = C_BEHIND;
389867531718SPhilipp Reisner 
3899b411b363SPhilipp Reisner 	if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3900b411b363SPhilipp Reisner 	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
3901b411b363SPhilipp Reisner 		int cr; /* consider resync */
3902b411b363SPhilipp Reisner 
3903b411b363SPhilipp Reisner 		/* if we established a new connection */
39044ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
3905b411b363SPhilipp Reisner 		/* if we had an established connection
3906b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
39074ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
3908b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
39094ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
3910b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
3911b411b363SPhilipp Reisner 		 * forced to be UpToDate with --overwrite-data */
3912b411b363SPhilipp Reisner 		cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3913b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
3914b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
39154ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
3916b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
3917b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
3918b411b363SPhilipp Reisner 
3919b411b363SPhilipp Reisner 		if (cr)
39204ac4aadaSLars Ellenberg 			ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
3921b411b363SPhilipp Reisner 
3922b411b363SPhilipp Reisner 		put_ldev(mdev);
39234ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
39244ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
3925b411b363SPhilipp Reisner 			if (mdev->state.disk == D_NEGOTIATING) {
392682f59cc6SLars Ellenberg 				drbd_force_state(mdev, NS(disk, D_FAILED));
3927b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
3928b411b363SPhilipp Reisner 				dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3929b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
3930580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
3931b411b363SPhilipp Reisner 			} else {
39328169e41bSPhilipp Reisner 				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
393382bc0194SAndreas Gruenbacher 					return -EIO;
39344ac4aadaSLars Ellenberg 				D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
393538fa9988SPhilipp Reisner 				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
393682bc0194SAndreas Gruenbacher 				return -EIO;
3937b411b363SPhilipp Reisner 			}
3938b411b363SPhilipp Reisner 		}
3939b411b363SPhilipp Reisner 	}
3940b411b363SPhilipp Reisner 
394187eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
394278bae59bSPhilipp Reisner 	if (os.i != drbd_read_state(mdev).i)
3943b411b363SPhilipp Reisner 		goto retry;
3944b411b363SPhilipp Reisner 	clear_bit(CONSIDER_RESYNC, &mdev->flags);
3945b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
3946b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
3947b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
39484ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
3949b411b363SPhilipp Reisner 		ns.disk = mdev->new_state_tmp.disk;
39504ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
39512aebfabbSPhilipp Reisner 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
3952481c6f50SPhilipp Reisner 	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
39538554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
3954481c6f50SPhilipp Reisner 		   for temporal network outages! */
395587eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
3956481c6f50SPhilipp Reisner 		dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
39572f5cdd0bSPhilipp Reisner 		tl_clear(mdev->tconn);
3958481c6f50SPhilipp Reisner 		drbd_uuid_new_current(mdev);
3959481c6f50SPhilipp Reisner 		clear_bit(NEW_CUR_UUID, &mdev->flags);
396038fa9988SPhilipp Reisner 		conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
396182bc0194SAndreas Gruenbacher 		return -EIO;
3962481c6f50SPhilipp Reisner 	}
396365d922c3SPhilipp Reisner 	rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
396478bae59bSPhilipp Reisner 	ns = drbd_read_state(mdev);
396587eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
3966b411b363SPhilipp Reisner 
3967b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
396838fa9988SPhilipp Reisner 		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
396982bc0194SAndreas Gruenbacher 		return -EIO;
3970b411b363SPhilipp Reisner 	}
3971b411b363SPhilipp Reisner 
39724ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
39734ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
3974b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
3975b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
3976b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
3977b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
3978b411b363SPhilipp Reisner 			drbd_send_uuids(mdev);
3979f479ea06SLars Ellenberg 			drbd_send_current_state(mdev);
3980b411b363SPhilipp Reisner 		}
3981b411b363SPhilipp Reisner 	}
3982b411b363SPhilipp Reisner 
398308b165baSPhilipp Reisner 	clear_bit(DISCARD_MY_DATA, &mdev->flags);
3984b411b363SPhilipp Reisner 
3985b411b363SPhilipp Reisner 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3986b411b363SPhilipp Reisner 
398782bc0194SAndreas Gruenbacher 	return 0;
3988b411b363SPhilipp Reisner }
3989b411b363SPhilipp Reisner 
39904a76b161SAndreas Gruenbacher static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
3991b411b363SPhilipp Reisner {
39924a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
3993e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
39944a76b161SAndreas Gruenbacher 
39954a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
39964a76b161SAndreas Gruenbacher 	if (!mdev)
39974a76b161SAndreas Gruenbacher 		return -EIO;
3998b411b363SPhilipp Reisner 
3999b411b363SPhilipp Reisner 	wait_event(mdev->misc_wait,
4000b411b363SPhilipp Reisner 		   mdev->state.conn == C_WF_SYNC_UUID ||
4001c4752ef1SPhilipp Reisner 		   mdev->state.conn == C_BEHIND ||
4002b411b363SPhilipp Reisner 		   mdev->state.conn < C_CONNECTED ||
4003b411b363SPhilipp Reisner 		   mdev->state.disk < D_NEGOTIATING);
4004b411b363SPhilipp Reisner 
4005b411b363SPhilipp Reisner 	/* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
4006b411b363SPhilipp Reisner 
4007b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4008b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4009b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
4010b411b363SPhilipp Reisner 		_drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
4011b411b363SPhilipp Reisner 		_drbd_uuid_set(mdev, UI_BITMAP, 0UL);
4012b411b363SPhilipp Reisner 
401362b0da3aSLars Ellenberg 		drbd_print_uuids(mdev, "updated sync uuid");
4014b411b363SPhilipp Reisner 		drbd_start_resync(mdev, C_SYNC_TARGET);
4015b411b363SPhilipp Reisner 
4016b411b363SPhilipp Reisner 		put_ldev(mdev);
4017b411b363SPhilipp Reisner 	} else
4018b411b363SPhilipp Reisner 		dev_err(DEV, "Ignoring SyncUUID packet!\n");
4019b411b363SPhilipp Reisner 
402082bc0194SAndreas Gruenbacher 	return 0;
4021b411b363SPhilipp Reisner }
4022b411b363SPhilipp Reisner 
40232c46407dSAndreas Gruenbacher /**
40242c46407dSAndreas Gruenbacher  * receive_bitmap_plain
40252c46407dSAndreas Gruenbacher  *
40262c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
40272c46407dSAndreas Gruenbacher  * code upon failure.
40282c46407dSAndreas Gruenbacher  */
40292c46407dSAndreas Gruenbacher static int
403050d0b1adSAndreas Gruenbacher receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
4031e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4032b411b363SPhilipp Reisner {
403350d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
403450d0b1adSAndreas Gruenbacher 				 drbd_header_size(mdev->tconn);
4035e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
403650d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4037e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
40382c46407dSAndreas Gruenbacher 	int err;
4039b411b363SPhilipp Reisner 
404050d0b1adSAndreas Gruenbacher 	if (want != size) {
404150d0b1adSAndreas Gruenbacher 		dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
40422c46407dSAndreas Gruenbacher 		return -EIO;
4043b411b363SPhilipp Reisner 	}
4044b411b363SPhilipp Reisner 	if (want == 0)
40452c46407dSAndreas Gruenbacher 		return 0;
4046e658983aSAndreas Gruenbacher 	err = drbd_recv_all(mdev->tconn, p, want);
404782bc0194SAndreas Gruenbacher 	if (err)
40482c46407dSAndreas Gruenbacher 		return err;
4049b411b363SPhilipp Reisner 
4050e658983aSAndreas Gruenbacher 	drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
4051b411b363SPhilipp Reisner 
4052b411b363SPhilipp Reisner 	c->word_offset += num_words;
4053b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4054b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4055b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4056b411b363SPhilipp Reisner 
40572c46407dSAndreas Gruenbacher 	return 1;
4058b411b363SPhilipp Reisner }
4059b411b363SPhilipp Reisner 
4060a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4061a02d1240SAndreas Gruenbacher {
4062a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4063a02d1240SAndreas Gruenbacher }
4064a02d1240SAndreas Gruenbacher 
4065a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4066a02d1240SAndreas Gruenbacher {
4067a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4068a02d1240SAndreas Gruenbacher }
4069a02d1240SAndreas Gruenbacher 
4070a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4071a02d1240SAndreas Gruenbacher {
4072a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4073a02d1240SAndreas Gruenbacher }
4074a02d1240SAndreas Gruenbacher 
40752c46407dSAndreas Gruenbacher /**
40762c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
40772c46407dSAndreas Gruenbacher  *
40782c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
40792c46407dSAndreas Gruenbacher  * code upon failure.
40802c46407dSAndreas Gruenbacher  */
40812c46407dSAndreas Gruenbacher static int
4082b411b363SPhilipp Reisner recv_bm_rle_bits(struct drbd_conf *mdev,
4083b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4084c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4085c6d25cfeSPhilipp Reisner 		 unsigned int len)
4086b411b363SPhilipp Reisner {
4087b411b363SPhilipp Reisner 	struct bitstream bs;
4088b411b363SPhilipp Reisner 	u64 look_ahead;
4089b411b363SPhilipp Reisner 	u64 rl;
4090b411b363SPhilipp Reisner 	u64 tmp;
4091b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4092b411b363SPhilipp Reisner 	unsigned long e;
4093a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4094b411b363SPhilipp Reisner 	int have;
4095b411b363SPhilipp Reisner 	int bits;
4096b411b363SPhilipp Reisner 
4097a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4098b411b363SPhilipp Reisner 
4099b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4100b411b363SPhilipp Reisner 	if (bits < 0)
41012c46407dSAndreas Gruenbacher 		return -EIO;
4102b411b363SPhilipp Reisner 
4103b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4104b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4105b411b363SPhilipp Reisner 		if (bits <= 0)
41062c46407dSAndreas Gruenbacher 			return -EIO;
4107b411b363SPhilipp Reisner 
4108b411b363SPhilipp Reisner 		if (toggle) {
4109b411b363SPhilipp Reisner 			e = s + rl -1;
4110b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
4111b411b363SPhilipp Reisner 				dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
41122c46407dSAndreas Gruenbacher 				return -EIO;
4113b411b363SPhilipp Reisner 			}
4114b411b363SPhilipp Reisner 			_drbd_bm_set_bits(mdev, s, e);
4115b411b363SPhilipp Reisner 		}
4116b411b363SPhilipp Reisner 
4117b411b363SPhilipp Reisner 		if (have < bits) {
4118b411b363SPhilipp Reisner 			dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4119b411b363SPhilipp Reisner 				have, bits, look_ahead,
4120b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4121b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
41222c46407dSAndreas Gruenbacher 			return -EIO;
4123b411b363SPhilipp Reisner 		}
4124b411b363SPhilipp Reisner 		look_ahead >>= bits;
4125b411b363SPhilipp Reisner 		have -= bits;
4126b411b363SPhilipp Reisner 
4127b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4128b411b363SPhilipp Reisner 		if (bits < 0)
41292c46407dSAndreas Gruenbacher 			return -EIO;
4130b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4131b411b363SPhilipp Reisner 		have += bits;
4132b411b363SPhilipp Reisner 	}
4133b411b363SPhilipp Reisner 
4134b411b363SPhilipp Reisner 	c->bit_offset = s;
4135b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4136b411b363SPhilipp Reisner 
41372c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4138b411b363SPhilipp Reisner }
4139b411b363SPhilipp Reisner 
41402c46407dSAndreas Gruenbacher /**
41412c46407dSAndreas Gruenbacher  * decode_bitmap_c
41422c46407dSAndreas Gruenbacher  *
41432c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
41442c46407dSAndreas Gruenbacher  * code upon failure.
41452c46407dSAndreas Gruenbacher  */
41462c46407dSAndreas Gruenbacher static int
4147b411b363SPhilipp Reisner decode_bitmap_c(struct drbd_conf *mdev,
4148b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4149c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4150c6d25cfeSPhilipp Reisner 		unsigned int len)
4151b411b363SPhilipp Reisner {
4152a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
4153e658983aSAndreas Gruenbacher 		return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
4154b411b363SPhilipp Reisner 
4155b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4156b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4157b411b363SPhilipp Reisner 	 * during all our tests. */
4158b411b363SPhilipp Reisner 
4159b411b363SPhilipp Reisner 	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
416038fa9988SPhilipp Reisner 	conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
41612c46407dSAndreas Gruenbacher 	return -EIO;
4162b411b363SPhilipp Reisner }
4163b411b363SPhilipp Reisner 
4164b411b363SPhilipp Reisner void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4165b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4166b411b363SPhilipp Reisner {
4167b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
416850d0b1adSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(mdev->tconn);
416950d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
417050d0b1adSAndreas Gruenbacher 	unsigned int plain =
417150d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
417250d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
417350d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
417450d0b1adSAndreas Gruenbacher 	unsigned int r;
4175b411b363SPhilipp Reisner 
4176b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4177b411b363SPhilipp Reisner 	if (total == 0)
4178b411b363SPhilipp Reisner 		return;
4179b411b363SPhilipp Reisner 
4180b411b363SPhilipp Reisner 	/* don't report if not compressed */
4181b411b363SPhilipp Reisner 	if (total >= plain)
4182b411b363SPhilipp Reisner 		return;
4183b411b363SPhilipp Reisner 
4184b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4185b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4186b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4187b411b363SPhilipp Reisner 
4188b411b363SPhilipp Reisner 	if (r > 1000)
4189b411b363SPhilipp Reisner 		r = 1000;
4190b411b363SPhilipp Reisner 
4191b411b363SPhilipp Reisner 	r = 1000 - r;
4192b411b363SPhilipp Reisner 	dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4193b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4194b411b363SPhilipp Reisner 			direction,
4195b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4196b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4197b411b363SPhilipp Reisner 			total, r/10, r % 10);
4198b411b363SPhilipp Reisner }
4199b411b363SPhilipp Reisner 
4200b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4201b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4202b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4203b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4204b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4205b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4206b411b363SPhilipp Reisner 
4207b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
42084a76b161SAndreas Gruenbacher static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
4209b411b363SPhilipp Reisner {
42104a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
4211b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
42122c46407dSAndreas Gruenbacher 	int err;
42134a76b161SAndreas Gruenbacher 
42144a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
42154a76b161SAndreas Gruenbacher 	if (!mdev)
42164a76b161SAndreas Gruenbacher 		return -EIO;
4217b411b363SPhilipp Reisner 
421820ceb2b2SLars Ellenberg 	drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
421920ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
422020ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4221b411b363SPhilipp Reisner 
4222b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4223b411b363SPhilipp Reisner 		.bm_bits = drbd_bm_bits(mdev),
4224b411b363SPhilipp Reisner 		.bm_words = drbd_bm_words(mdev),
4225b411b363SPhilipp Reisner 	};
4226b411b363SPhilipp Reisner 
42272c46407dSAndreas Gruenbacher 	for(;;) {
4228e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
4229e658983aSAndreas Gruenbacher 			err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4230e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4231b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4232b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4233e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4234b411b363SPhilipp Reisner 
423550d0b1adSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
4236b411b363SPhilipp Reisner 				dev_err(DEV, "ReportCBitmap packet too large\n");
423782bc0194SAndreas Gruenbacher 				err = -EIO;
4238b411b363SPhilipp Reisner 				goto out;
4239b411b363SPhilipp Reisner 			}
4240e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4241e2857216SAndreas Gruenbacher 				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
424282bc0194SAndreas Gruenbacher 				err = -EIO;
424378fcbdaeSAndreas Gruenbacher 				goto out;
4244b411b363SPhilipp Reisner 			}
4245e658983aSAndreas Gruenbacher 			err = drbd_recv_all(mdev->tconn, p, pi->size);
4246e658983aSAndreas Gruenbacher 			if (err)
4247e658983aSAndreas Gruenbacher 			       goto out;
4248e2857216SAndreas Gruenbacher 			err = decode_bitmap_c(mdev, p, &c, pi->size);
4249b411b363SPhilipp Reisner 		} else {
4250e2857216SAndreas Gruenbacher 			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
425182bc0194SAndreas Gruenbacher 			err = -EIO;
4252b411b363SPhilipp Reisner 			goto out;
4253b411b363SPhilipp Reisner 		}
4254b411b363SPhilipp Reisner 
4255e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
425650d0b1adSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
4257b411b363SPhilipp Reisner 
42582c46407dSAndreas Gruenbacher 		if (err <= 0) {
42592c46407dSAndreas Gruenbacher 			if (err < 0)
42602c46407dSAndreas Gruenbacher 				goto out;
4261b411b363SPhilipp Reisner 			break;
42622c46407dSAndreas Gruenbacher 		}
4263e2857216SAndreas Gruenbacher 		err = drbd_recv_header(mdev->tconn, pi);
426482bc0194SAndreas Gruenbacher 		if (err)
4265b411b363SPhilipp Reisner 			goto out;
42662c46407dSAndreas Gruenbacher 	}
4267b411b363SPhilipp Reisner 
4268b411b363SPhilipp Reisner 	INFO_bm_xfer_stats(mdev, "receive", &c);
4269b411b363SPhilipp Reisner 
4270b411b363SPhilipp Reisner 	if (mdev->state.conn == C_WF_BITMAP_T) {
4271de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4272de1f8e4aSAndreas Gruenbacher 
427382bc0194SAndreas Gruenbacher 		err = drbd_send_bitmap(mdev);
427482bc0194SAndreas Gruenbacher 		if (err)
4275b411b363SPhilipp Reisner 			goto out;
4276b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4277de1f8e4aSAndreas Gruenbacher 		rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4278de1f8e4aSAndreas Gruenbacher 		D_ASSERT(rv == SS_SUCCESS);
4279b411b363SPhilipp Reisner 	} else if (mdev->state.conn != C_WF_BITMAP_S) {
4280b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4281b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4282b411b363SPhilipp Reisner 		dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4283b411b363SPhilipp Reisner 		    drbd_conn_str(mdev->state.conn));
4284b411b363SPhilipp Reisner 	}
428582bc0194SAndreas Gruenbacher 	err = 0;
4286b411b363SPhilipp Reisner 
4287b411b363SPhilipp Reisner  out:
428820ceb2b2SLars Ellenberg 	drbd_bm_unlock(mdev);
428982bc0194SAndreas Gruenbacher 	if (!err && mdev->state.conn == C_WF_BITMAP_S)
4290b411b363SPhilipp Reisner 		drbd_start_resync(mdev, C_SYNC_SOURCE);
429182bc0194SAndreas Gruenbacher 	return err;
4292b411b363SPhilipp Reisner }
4293b411b363SPhilipp Reisner 
42944a76b161SAndreas Gruenbacher static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
4295b411b363SPhilipp Reisner {
42964a76b161SAndreas Gruenbacher 	conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
4297e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4298b411b363SPhilipp Reisner 
42994a76b161SAndreas Gruenbacher 	return ignore_remaining_packet(tconn, pi);
4300b411b363SPhilipp Reisner }
4301b411b363SPhilipp Reisner 
43024a76b161SAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
4303b411b363SPhilipp Reisner {
4304b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4305b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
43064a76b161SAndreas Gruenbacher 	drbd_tcp_quickack(tconn->data.socket);
4307b411b363SPhilipp Reisner 
430882bc0194SAndreas Gruenbacher 	return 0;
4309b411b363SPhilipp Reisner }
4310b411b363SPhilipp Reisner 
43114a76b161SAndreas Gruenbacher static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
431273a01a18SPhilipp Reisner {
43134a76b161SAndreas Gruenbacher 	struct drbd_conf *mdev;
4314e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
43154a76b161SAndreas Gruenbacher 
43164a76b161SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
43174a76b161SAndreas Gruenbacher 	if (!mdev)
43184a76b161SAndreas Gruenbacher 		return -EIO;
431973a01a18SPhilipp Reisner 
4320f735e363SLars Ellenberg 	switch (mdev->state.conn) {
4321f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4322f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4323f735e363SLars Ellenberg 	case C_BEHIND:
4324f735e363SLars Ellenberg 			break;
4325f735e363SLars Ellenberg 	default:
4326f735e363SLars Ellenberg 		dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4327f735e363SLars Ellenberg 				drbd_conn_str(mdev->state.conn));
4328f735e363SLars Ellenberg 	}
4329f735e363SLars Ellenberg 
433073a01a18SPhilipp Reisner 	drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
433173a01a18SPhilipp Reisner 
433282bc0194SAndreas Gruenbacher 	return 0;
433373a01a18SPhilipp Reisner }
433473a01a18SPhilipp Reisner 
433502918be2SPhilipp Reisner struct data_cmd {
433602918be2SPhilipp Reisner 	int expect_payload;
433702918be2SPhilipp Reisner 	size_t pkt_size;
43384a76b161SAndreas Gruenbacher 	int (*fn)(struct drbd_tconn *, struct packet_info *);
4339b411b363SPhilipp Reisner };
4340b411b363SPhilipp Reisner 
434102918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
434202918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
434302918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
434402918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
434502918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
4346e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
4347e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4348e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
434902918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
435002918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4351e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
4352e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
435302918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
435402918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
435502918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
435602918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
435702918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
435802918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
435902918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
436002918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
436102918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
436202918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
436373a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
43644a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
4365036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
436602918be2SPhilipp Reisner };
436702918be2SPhilipp Reisner 
4368eefc2f7dSPhilipp Reisner static void drbdd(struct drbd_tconn *tconn)
4369b411b363SPhilipp Reisner {
437077351055SPhilipp Reisner 	struct packet_info pi;
437102918be2SPhilipp Reisner 	size_t shs; /* sub header size */
437282bc0194SAndreas Gruenbacher 	int err;
4373b411b363SPhilipp Reisner 
4374eefc2f7dSPhilipp Reisner 	while (get_t_state(&tconn->receiver) == RUNNING) {
4375deebe195SAndreas Gruenbacher 		struct data_cmd *cmd;
4376deebe195SAndreas Gruenbacher 
4377eefc2f7dSPhilipp Reisner 		drbd_thread_current_set_cpu(&tconn->receiver);
437869bc7bc3SAndreas Gruenbacher 		if (drbd_recv_header(tconn, &pi))
437902918be2SPhilipp Reisner 			goto err_out;
438002918be2SPhilipp Reisner 
4381deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
43824a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
43832fcb8f30SAndreas Gruenbacher 			conn_err(tconn, "Unexpected data packet %s (0x%04x)",
43842fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
438502918be2SPhilipp Reisner 			goto err_out;
43860b33a916SLars Ellenberg 		}
4387b411b363SPhilipp Reisner 
4388e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
4389e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
43902fcb8f30SAndreas Gruenbacher 			conn_err(tconn, "No payload expected %s l:%d\n",
43912fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
4392c13f7e1aSLars Ellenberg 			goto err_out;
4393c13f7e1aSLars Ellenberg 		}
4394c13f7e1aSLars Ellenberg 
4395c13f7e1aSLars Ellenberg 		if (shs) {
4396e658983aSAndreas Gruenbacher 			err = drbd_recv_all_warn(tconn, pi.data, shs);
4397a5c31904SAndreas Gruenbacher 			if (err)
439802918be2SPhilipp Reisner 				goto err_out;
4399e2857216SAndreas Gruenbacher 			pi.size -= shs;
4400b411b363SPhilipp Reisner 		}
440102918be2SPhilipp Reisner 
44024a76b161SAndreas Gruenbacher 		err = cmd->fn(tconn, &pi);
44034a76b161SAndreas Gruenbacher 		if (err) {
44049f5bdc33SAndreas Gruenbacher 			conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
44059f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
440602918be2SPhilipp Reisner 			goto err_out;
440702918be2SPhilipp Reisner 		}
440802918be2SPhilipp Reisner 	}
440982bc0194SAndreas Gruenbacher 	return;
441002918be2SPhilipp Reisner 
441102918be2SPhilipp Reisner     err_out:
4412bbeb641cSPhilipp Reisner 	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
4413b411b363SPhilipp Reisner }
4414b411b363SPhilipp Reisner 
44150e29d163SPhilipp Reisner void conn_flush_workqueue(struct drbd_tconn *tconn)
4416b411b363SPhilipp Reisner {
4417b411b363SPhilipp Reisner 	struct drbd_wq_barrier barr;
4418b411b363SPhilipp Reisner 
4419b411b363SPhilipp Reisner 	barr.w.cb = w_prev_work_done;
44200e29d163SPhilipp Reisner 	barr.w.tconn = tconn;
4421b411b363SPhilipp Reisner 	init_completion(&barr.done);
4422d5b27b01SLars Ellenberg 	drbd_queue_work(&tconn->sender_work, &barr.w);
4423b411b363SPhilipp Reisner 	wait_for_completion(&barr.done);
4424b411b363SPhilipp Reisner }
4425b411b363SPhilipp Reisner 
442681fa2e67SPhilipp Reisner static void conn_disconnect(struct drbd_tconn *tconn)
4427f70b3511SPhilipp Reisner {
4428c141ebdaSPhilipp Reisner 	struct drbd_conf *mdev;
4429bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
4430376694a0SPhilipp Reisner 	int vnr;
4431f70b3511SPhilipp Reisner 
4432bbeb641cSPhilipp Reisner 	if (tconn->cstate == C_STANDALONE)
4433b411b363SPhilipp Reisner 		return;
4434b411b363SPhilipp Reisner 
4435545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
4436545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
4437545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
4438545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
4439545752d5SLars Ellenberg 	 */
4440b8853dbdSPhilipp Reisner 	conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4441545752d5SLars Ellenberg 
4442b411b363SPhilipp Reisner 	/* asender does not clean up anything. it must not interfere, either */
4443360cc740SPhilipp Reisner 	drbd_thread_stop(&tconn->asender);
4444360cc740SPhilipp Reisner 	drbd_free_sock(tconn);
4445360cc740SPhilipp Reisner 
4446c141ebdaSPhilipp Reisner 	rcu_read_lock();
4447c141ebdaSPhilipp Reisner 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4448c141ebdaSPhilipp Reisner 		kref_get(&mdev->kref);
4449c141ebdaSPhilipp Reisner 		rcu_read_unlock();
4450c141ebdaSPhilipp Reisner 		drbd_disconnected(mdev);
4451c141ebdaSPhilipp Reisner 		kref_put(&mdev->kref, &drbd_minor_destroy);
4452c141ebdaSPhilipp Reisner 		rcu_read_lock();
4453c141ebdaSPhilipp Reisner 	}
4454c141ebdaSPhilipp Reisner 	rcu_read_unlock();
4455c141ebdaSPhilipp Reisner 
445612038a3aSPhilipp Reisner 	if (!list_empty(&tconn->current_epoch->list))
445712038a3aSPhilipp Reisner 		conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
445812038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
445912038a3aSPhilipp Reisner 	atomic_set(&tconn->current_epoch->epoch_size, 0);
4460b6dd1a89SLars Ellenberg 	tconn->send.seen_any_write_yet = false;
446112038a3aSPhilipp Reisner 
4462360cc740SPhilipp Reisner 	conn_info(tconn, "Connection closed\n");
4463360cc740SPhilipp Reisner 
4464cb703454SPhilipp Reisner 	if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4465cb703454SPhilipp Reisner 		conn_try_outdate_peer_async(tconn);
4466cb703454SPhilipp Reisner 
4467360cc740SPhilipp Reisner 	spin_lock_irq(&tconn->req_lock);
4468bbeb641cSPhilipp Reisner 	oc = tconn->cstate;
4469bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
4470376694a0SPhilipp Reisner 		_conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4471bbeb641cSPhilipp Reisner 
4472360cc740SPhilipp Reisner 	spin_unlock_irq(&tconn->req_lock);
4473360cc740SPhilipp Reisner 
4474f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
4475d9cc6e23SLars Ellenberg 		conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
4476360cc740SPhilipp Reisner }
4477360cc740SPhilipp Reisner 
4478c141ebdaSPhilipp Reisner static int drbd_disconnected(struct drbd_conf *mdev)
4479360cc740SPhilipp Reisner {
4480360cc740SPhilipp Reisner 	unsigned int i;
4481b411b363SPhilipp Reisner 
448285719573SPhilipp Reisner 	/* wait for current activity to cease. */
448387eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
4484b411b363SPhilipp Reisner 	_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4485b411b363SPhilipp Reisner 	_drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4486b411b363SPhilipp Reisner 	_drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
448787eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
4488b411b363SPhilipp Reisner 
4489b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
4490b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
4491b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
4492b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
4493b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
4494b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4495b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
4496b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
4497b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
4498b411b363SPhilipp Reisner 	 *  on the fly. */
4499b411b363SPhilipp Reisner 	drbd_rs_cancel_all(mdev);
4500b411b363SPhilipp Reisner 	mdev->rs_total = 0;
4501b411b363SPhilipp Reisner 	mdev->rs_failed = 0;
4502b411b363SPhilipp Reisner 	atomic_set(&mdev->rs_pending_cnt, 0);
4503b411b363SPhilipp Reisner 	wake_up(&mdev->misc_wait);
4504b411b363SPhilipp Reisner 
4505b411b363SPhilipp Reisner 	del_timer_sync(&mdev->resync_timer);
4506b411b363SPhilipp Reisner 	resync_timer_fn((unsigned long)mdev);
4507b411b363SPhilipp Reisner 
4508b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4509b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
4510b411b363SPhilipp Reisner 	 * to be "canceled" */
4511b411b363SPhilipp Reisner 	drbd_flush_workqueue(mdev);
4512b411b363SPhilipp Reisner 
4513a990be46SAndreas Gruenbacher 	drbd_finish_peer_reqs(mdev);
4514b411b363SPhilipp Reisner 
4515d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4516d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
4517d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4518d10b4ea3SPhilipp Reisner 	drbd_flush_workqueue(mdev);
4519d10b4ea3SPhilipp Reisner 
452008332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
452108332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
452208332d73SLars Ellenberg 	drbd_rs_cancel_all(mdev);
4523b411b363SPhilipp Reisner 
4524b411b363SPhilipp Reisner 	kfree(mdev->p_uuid);
4525b411b363SPhilipp Reisner 	mdev->p_uuid = NULL;
4526b411b363SPhilipp Reisner 
45272aebfabbSPhilipp Reisner 	if (!drbd_suspended(mdev))
45282f5cdd0bSPhilipp Reisner 		tl_clear(mdev->tconn);
4529b411b363SPhilipp Reisner 
4530b411b363SPhilipp Reisner 	drbd_md_sync(mdev);
4531b411b363SPhilipp Reisner 
453220ceb2b2SLars Ellenberg 	/* serialize with bitmap writeout triggered by the state change,
453320ceb2b2SLars Ellenberg 	 * if any. */
453420ceb2b2SLars Ellenberg 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
453520ceb2b2SLars Ellenberg 
4536b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
4537b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
4538b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
4539b411b363SPhilipp Reisner 	 *
4540b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
4541b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
4542b411b363SPhilipp Reisner 	 */
45437721f567SAndreas Gruenbacher 	i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
4544b411b363SPhilipp Reisner 	if (i)
4545b411b363SPhilipp Reisner 		dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
4546435f0740SLars Ellenberg 	i = atomic_read(&mdev->pp_in_use_by_net);
4547435f0740SLars Ellenberg 	if (i)
4548435f0740SLars Ellenberg 		dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
4549b411b363SPhilipp Reisner 	i = atomic_read(&mdev->pp_in_use);
4550b411b363SPhilipp Reisner 	if (i)
455145bb912bSLars Ellenberg 		dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
4552b411b363SPhilipp Reisner 
4553b411b363SPhilipp Reisner 	D_ASSERT(list_empty(&mdev->read_ee));
4554b411b363SPhilipp Reisner 	D_ASSERT(list_empty(&mdev->active_ee));
4555b411b363SPhilipp Reisner 	D_ASSERT(list_empty(&mdev->sync_ee));
4556b411b363SPhilipp Reisner 	D_ASSERT(list_empty(&mdev->done_ee));
4557b411b363SPhilipp Reisner 
4558360cc740SPhilipp Reisner 	return 0;
4559b411b363SPhilipp Reisner }
4560b411b363SPhilipp Reisner 
4561b411b363SPhilipp Reisner /*
4562b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4563b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
4564b411b363SPhilipp Reisner  *
4565b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
4566b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
4567b411b363SPhilipp Reisner  *
4568b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
4569b411b363SPhilipp Reisner  */
45706038178eSAndreas Gruenbacher static int drbd_send_features(struct drbd_tconn *tconn)
4571b411b363SPhilipp Reisner {
45729f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
45739f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
4574b411b363SPhilipp Reisner 
45759f5bdc33SAndreas Gruenbacher 	sock = &tconn->data;
45769f5bdc33SAndreas Gruenbacher 	p = conn_prepare_command(tconn, sock);
45779f5bdc33SAndreas Gruenbacher 	if (!p)
4578e8d17b01SAndreas Gruenbacher 		return -EIO;
4579b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
4580b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4581b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
45829f5bdc33SAndreas Gruenbacher 	return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
4583b411b363SPhilipp Reisner }
4584b411b363SPhilipp Reisner 
4585b411b363SPhilipp Reisner /*
4586b411b363SPhilipp Reisner  * return values:
4587b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
4588b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
4589b411b363SPhilipp Reisner  *  -1 peer talks different language,
4590b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
4591b411b363SPhilipp Reisner  */
45926038178eSAndreas Gruenbacher static int drbd_do_features(struct drbd_tconn *tconn)
4593b411b363SPhilipp Reisner {
459465d11ed6SPhilipp Reisner 	/* ASSERT current == tconn->receiver ... */
4595e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
4596e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
459777351055SPhilipp Reisner 	struct packet_info pi;
4598a5c31904SAndreas Gruenbacher 	int err;
4599b411b363SPhilipp Reisner 
46006038178eSAndreas Gruenbacher 	err = drbd_send_features(tconn);
4601e8d17b01SAndreas Gruenbacher 	if (err)
4602b411b363SPhilipp Reisner 		return 0;
4603b411b363SPhilipp Reisner 
460469bc7bc3SAndreas Gruenbacher 	err = drbd_recv_header(tconn, &pi);
460569bc7bc3SAndreas Gruenbacher 	if (err)
4606b411b363SPhilipp Reisner 		return 0;
4607b411b363SPhilipp Reisner 
46086038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
46096038178eSAndreas Gruenbacher 		conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
461077351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4611b411b363SPhilipp Reisner 		return -1;
4612b411b363SPhilipp Reisner 	}
4613b411b363SPhilipp Reisner 
461477351055SPhilipp Reisner 	if (pi.size != expect) {
46156038178eSAndreas Gruenbacher 		conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
461677351055SPhilipp Reisner 		     expect, pi.size);
4617b411b363SPhilipp Reisner 		return -1;
4618b411b363SPhilipp Reisner 	}
4619b411b363SPhilipp Reisner 
4620e658983aSAndreas Gruenbacher 	p = pi.data;
4621e658983aSAndreas Gruenbacher 	err = drbd_recv_all_warn(tconn, p, expect);
4622a5c31904SAndreas Gruenbacher 	if (err)
4623b411b363SPhilipp Reisner 		return 0;
4624b411b363SPhilipp Reisner 
4625b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
4626b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
4627b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
4628b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
4629b411b363SPhilipp Reisner 
4630b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
4631b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
4632b411b363SPhilipp Reisner 		goto incompat;
4633b411b363SPhilipp Reisner 
463465d11ed6SPhilipp Reisner 	tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
4635b411b363SPhilipp Reisner 
463665d11ed6SPhilipp Reisner 	conn_info(tconn, "Handshake successful: "
463765d11ed6SPhilipp Reisner 	     "Agreed network protocol version %d\n", tconn->agreed_pro_version);
4638b411b363SPhilipp Reisner 
4639b411b363SPhilipp Reisner 	return 1;
4640b411b363SPhilipp Reisner 
4641b411b363SPhilipp Reisner  incompat:
464265d11ed6SPhilipp Reisner 	conn_err(tconn, "incompatible DRBD dialects: "
4643b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
4644b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
4645b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
4646b411b363SPhilipp Reisner 	return -1;
4647b411b363SPhilipp Reisner }
4648b411b363SPhilipp Reisner 
4649b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
465013e6037dSPhilipp Reisner static int drbd_do_auth(struct drbd_tconn *tconn)
4651b411b363SPhilipp Reisner {
4652b411b363SPhilipp Reisner 	dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4653b411b363SPhilipp Reisner 	dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
4654b10d96cbSJohannes Thoma 	return -1;
4655b411b363SPhilipp Reisner }
4656b411b363SPhilipp Reisner #else
4657b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
4658b10d96cbSJohannes Thoma 
4659b10d96cbSJohannes Thoma /* Return value:
4660b10d96cbSJohannes Thoma 	1 - auth succeeded,
4661b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
4662b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
4663b10d96cbSJohannes Thoma */
4664b10d96cbSJohannes Thoma 
466513e6037dSPhilipp Reisner static int drbd_do_auth(struct drbd_tconn *tconn)
4666b411b363SPhilipp Reisner {
46679f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
4668b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
4669b411b363SPhilipp Reisner 	struct scatterlist sg;
4670b411b363SPhilipp Reisner 	char *response = NULL;
4671b411b363SPhilipp Reisner 	char *right_response = NULL;
4672b411b363SPhilipp Reisner 	char *peers_ch = NULL;
467344ed167dSPhilipp Reisner 	unsigned int key_len;
467444ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
4675b411b363SPhilipp Reisner 	unsigned int resp_size;
4676b411b363SPhilipp Reisner 	struct hash_desc desc;
467777351055SPhilipp Reisner 	struct packet_info pi;
467844ed167dSPhilipp Reisner 	struct net_conf *nc;
467969bc7bc3SAndreas Gruenbacher 	int err, rv;
4680b411b363SPhilipp Reisner 
46819f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
46829f5bdc33SAndreas Gruenbacher 
468344ed167dSPhilipp Reisner 	rcu_read_lock();
468444ed167dSPhilipp Reisner 	nc = rcu_dereference(tconn->net_conf);
468544ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
468644ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
468744ed167dSPhilipp Reisner 	rcu_read_unlock();
468844ed167dSPhilipp Reisner 
468913e6037dSPhilipp Reisner 	desc.tfm = tconn->cram_hmac_tfm;
4690b411b363SPhilipp Reisner 	desc.flags = 0;
4691b411b363SPhilipp Reisner 
469244ed167dSPhilipp Reisner 	rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
4693b411b363SPhilipp Reisner 	if (rv) {
469413e6037dSPhilipp Reisner 		conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
4695b10d96cbSJohannes Thoma 		rv = -1;
4696b411b363SPhilipp Reisner 		goto fail;
4697b411b363SPhilipp Reisner 	}
4698b411b363SPhilipp Reisner 
4699b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
4700b411b363SPhilipp Reisner 
47019f5bdc33SAndreas Gruenbacher 	sock = &tconn->data;
47029f5bdc33SAndreas Gruenbacher 	if (!conn_prepare_command(tconn, sock)) {
47039f5bdc33SAndreas Gruenbacher 		rv = 0;
47049f5bdc33SAndreas Gruenbacher 		goto fail;
47059f5bdc33SAndreas Gruenbacher 	}
4706e658983aSAndreas Gruenbacher 	rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
47079f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
4708b411b363SPhilipp Reisner 	if (!rv)
4709b411b363SPhilipp Reisner 		goto fail;
4710b411b363SPhilipp Reisner 
471169bc7bc3SAndreas Gruenbacher 	err = drbd_recv_header(tconn, &pi);
471269bc7bc3SAndreas Gruenbacher 	if (err) {
4713b411b363SPhilipp Reisner 		rv = 0;
4714b411b363SPhilipp Reisner 		goto fail;
4715b411b363SPhilipp Reisner 	}
4716b411b363SPhilipp Reisner 
471777351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
471813e6037dSPhilipp Reisner 		conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
471977351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4720b411b363SPhilipp Reisner 		rv = 0;
4721b411b363SPhilipp Reisner 		goto fail;
4722b411b363SPhilipp Reisner 	}
4723b411b363SPhilipp Reisner 
472477351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
472513e6037dSPhilipp Reisner 		conn_err(tconn, "expected AuthChallenge payload too big.\n");
4726b10d96cbSJohannes Thoma 		rv = -1;
4727b411b363SPhilipp Reisner 		goto fail;
4728b411b363SPhilipp Reisner 	}
4729b411b363SPhilipp Reisner 
473077351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
4731b411b363SPhilipp Reisner 	if (peers_ch == NULL) {
473213e6037dSPhilipp Reisner 		conn_err(tconn, "kmalloc of peers_ch failed\n");
4733b10d96cbSJohannes Thoma 		rv = -1;
4734b411b363SPhilipp Reisner 		goto fail;
4735b411b363SPhilipp Reisner 	}
4736b411b363SPhilipp Reisner 
4737a5c31904SAndreas Gruenbacher 	err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4738a5c31904SAndreas Gruenbacher 	if (err) {
4739b411b363SPhilipp Reisner 		rv = 0;
4740b411b363SPhilipp Reisner 		goto fail;
4741b411b363SPhilipp Reisner 	}
4742b411b363SPhilipp Reisner 
474313e6037dSPhilipp Reisner 	resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
4744b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
4745b411b363SPhilipp Reisner 	if (response == NULL) {
474613e6037dSPhilipp Reisner 		conn_err(tconn, "kmalloc of response failed\n");
4747b10d96cbSJohannes Thoma 		rv = -1;
4748b411b363SPhilipp Reisner 		goto fail;
4749b411b363SPhilipp Reisner 	}
4750b411b363SPhilipp Reisner 
4751b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
475277351055SPhilipp Reisner 	sg_set_buf(&sg, peers_ch, pi.size);
4753b411b363SPhilipp Reisner 
4754b411b363SPhilipp Reisner 	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4755b411b363SPhilipp Reisner 	if (rv) {
475613e6037dSPhilipp Reisner 		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
4757b10d96cbSJohannes Thoma 		rv = -1;
4758b411b363SPhilipp Reisner 		goto fail;
4759b411b363SPhilipp Reisner 	}
4760b411b363SPhilipp Reisner 
47619f5bdc33SAndreas Gruenbacher 	if (!conn_prepare_command(tconn, sock)) {
47629f5bdc33SAndreas Gruenbacher 		rv = 0;
47639f5bdc33SAndreas Gruenbacher 		goto fail;
47649f5bdc33SAndreas Gruenbacher 	}
4765e658983aSAndreas Gruenbacher 	rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
47669f5bdc33SAndreas Gruenbacher 				response, resp_size);
4767b411b363SPhilipp Reisner 	if (!rv)
4768b411b363SPhilipp Reisner 		goto fail;
4769b411b363SPhilipp Reisner 
477069bc7bc3SAndreas Gruenbacher 	err = drbd_recv_header(tconn, &pi);
477169bc7bc3SAndreas Gruenbacher 	if (err) {
4772b411b363SPhilipp Reisner 		rv = 0;
4773b411b363SPhilipp Reisner 		goto fail;
4774b411b363SPhilipp Reisner 	}
4775b411b363SPhilipp Reisner 
477677351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
477713e6037dSPhilipp Reisner 		conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
477877351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4779b411b363SPhilipp Reisner 		rv = 0;
4780b411b363SPhilipp Reisner 		goto fail;
4781b411b363SPhilipp Reisner 	}
4782b411b363SPhilipp Reisner 
478377351055SPhilipp Reisner 	if (pi.size != resp_size) {
478413e6037dSPhilipp Reisner 		conn_err(tconn, "expected AuthResponse payload of wrong size\n");
4785b411b363SPhilipp Reisner 		rv = 0;
4786b411b363SPhilipp Reisner 		goto fail;
4787b411b363SPhilipp Reisner 	}
4788b411b363SPhilipp Reisner 
4789a5c31904SAndreas Gruenbacher 	err = drbd_recv_all_warn(tconn, response , resp_size);
4790a5c31904SAndreas Gruenbacher 	if (err) {
4791b411b363SPhilipp Reisner 		rv = 0;
4792b411b363SPhilipp Reisner 		goto fail;
4793b411b363SPhilipp Reisner 	}
4794b411b363SPhilipp Reisner 
4795b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
47962d1ee87dSJulia Lawall 	if (right_response == NULL) {
479713e6037dSPhilipp Reisner 		conn_err(tconn, "kmalloc of right_response failed\n");
4798b10d96cbSJohannes Thoma 		rv = -1;
4799b411b363SPhilipp Reisner 		goto fail;
4800b411b363SPhilipp Reisner 	}
4801b411b363SPhilipp Reisner 
4802b411b363SPhilipp Reisner 	sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4803b411b363SPhilipp Reisner 
4804b411b363SPhilipp Reisner 	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4805b411b363SPhilipp Reisner 	if (rv) {
480613e6037dSPhilipp Reisner 		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
4807b10d96cbSJohannes Thoma 		rv = -1;
4808b411b363SPhilipp Reisner 		goto fail;
4809b411b363SPhilipp Reisner 	}
4810b411b363SPhilipp Reisner 
4811b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
4812b411b363SPhilipp Reisner 
4813b411b363SPhilipp Reisner 	if (rv)
481444ed167dSPhilipp Reisner 		conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
481544ed167dSPhilipp Reisner 		     resp_size);
4816b10d96cbSJohannes Thoma 	else
4817b10d96cbSJohannes Thoma 		rv = -1;
4818b411b363SPhilipp Reisner 
4819b411b363SPhilipp Reisner  fail:
4820b411b363SPhilipp Reisner 	kfree(peers_ch);
4821b411b363SPhilipp Reisner 	kfree(response);
4822b411b363SPhilipp Reisner 	kfree(right_response);
4823b411b363SPhilipp Reisner 
4824b411b363SPhilipp Reisner 	return rv;
4825b411b363SPhilipp Reisner }
4826b411b363SPhilipp Reisner #endif
4827b411b363SPhilipp Reisner 
4828b411b363SPhilipp Reisner int drbdd_init(struct drbd_thread *thi)
4829b411b363SPhilipp Reisner {
4830392c8801SPhilipp Reisner 	struct drbd_tconn *tconn = thi->tconn;
4831b411b363SPhilipp Reisner 	int h;
4832b411b363SPhilipp Reisner 
48334d641dd7SPhilipp Reisner 	conn_info(tconn, "receiver (re)started\n");
4834b411b363SPhilipp Reisner 
4835b411b363SPhilipp Reisner 	do {
483681fa2e67SPhilipp Reisner 		h = conn_connect(tconn);
4837b411b363SPhilipp Reisner 		if (h == 0) {
483881fa2e67SPhilipp Reisner 			conn_disconnect(tconn);
483920ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
4840b411b363SPhilipp Reisner 		}
4841b411b363SPhilipp Reisner 		if (h == -1) {
48424d641dd7SPhilipp Reisner 			conn_warn(tconn, "Discarding network configuration.\n");
4843bbeb641cSPhilipp Reisner 			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
4844b411b363SPhilipp Reisner 		}
4845b411b363SPhilipp Reisner 	} while (h == 0);
4846b411b363SPhilipp Reisner 
484791fd4dadSPhilipp Reisner 	if (h > 0)
48484d641dd7SPhilipp Reisner 		drbdd(tconn);
4849b411b363SPhilipp Reisner 
485081fa2e67SPhilipp Reisner 	conn_disconnect(tconn);
4851b411b363SPhilipp Reisner 
48524d641dd7SPhilipp Reisner 	conn_info(tconn, "receiver terminated\n");
4853b411b363SPhilipp Reisner 	return 0;
4854b411b363SPhilipp Reisner }
4855b411b363SPhilipp Reisner 
4856b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
4857b411b363SPhilipp Reisner 
4858e05e1e59SAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
4859b411b363SPhilipp Reisner {
4860e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
4861b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
4862b411b363SPhilipp Reisner 
4863b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
4864fc3b10a4SPhilipp Reisner 		set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4865b411b363SPhilipp Reisner 	} else {
4866fc3b10a4SPhilipp Reisner 		set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4867fc3b10a4SPhilipp Reisner 		conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4868fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
4869fc3b10a4SPhilipp Reisner 	}
4870fc3b10a4SPhilipp Reisner 	wake_up(&tconn->ping_wait);
4871e4f78edeSPhilipp Reisner 
48722735a594SAndreas Gruenbacher 	return 0;
4873fc3b10a4SPhilipp Reisner }
4874e4f78edeSPhilipp Reisner 
48751952e916SAndreas Gruenbacher static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
4876e4f78edeSPhilipp Reisner {
48771952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
4878e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
4879e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
4880e4f78edeSPhilipp Reisner 
48811952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
48821952e916SAndreas Gruenbacher 	if (!mdev)
48832735a594SAndreas Gruenbacher 		return -EIO;
48841952e916SAndreas Gruenbacher 
48854d0fc3fdSPhilipp Reisner 	if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
48864d0fc3fdSPhilipp Reisner 		D_ASSERT(tconn->agreed_pro_version < 100);
48874d0fc3fdSPhilipp Reisner 		return got_conn_RqSReply(tconn, pi);
48884d0fc3fdSPhilipp Reisner 	}
48894d0fc3fdSPhilipp Reisner 
4890e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
4891e4f78edeSPhilipp Reisner 		set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4892e4f78edeSPhilipp Reisner 	} else {
4893e4f78edeSPhilipp Reisner 		set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4894b411b363SPhilipp Reisner 		dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4895b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
4896b411b363SPhilipp Reisner 	}
4897b411b363SPhilipp Reisner 	wake_up(&mdev->state_wait);
4898b411b363SPhilipp Reisner 
48992735a594SAndreas Gruenbacher 	return 0;
4900b411b363SPhilipp Reisner }
4901b411b363SPhilipp Reisner 
4902e05e1e59SAndreas Gruenbacher static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
4903b411b363SPhilipp Reisner {
49042735a594SAndreas Gruenbacher 	return drbd_send_ping_ack(tconn);
4905b411b363SPhilipp Reisner 
4906b411b363SPhilipp Reisner }
4907b411b363SPhilipp Reisner 
4908e05e1e59SAndreas Gruenbacher static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
4909b411b363SPhilipp Reisner {
4910b411b363SPhilipp Reisner 	/* restore idle timeout */
49112a67d8b9SPhilipp Reisner 	tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
49122a67d8b9SPhilipp Reisner 	if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
49132a67d8b9SPhilipp Reisner 		wake_up(&tconn->ping_wait);
4914b411b363SPhilipp Reisner 
49152735a594SAndreas Gruenbacher 	return 0;
4916b411b363SPhilipp Reisner }
4917b411b363SPhilipp Reisner 
49181952e916SAndreas Gruenbacher static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
4919b411b363SPhilipp Reisner {
49201952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
4921e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
4922b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
4923b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
4924b411b363SPhilipp Reisner 
49251952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
49261952e916SAndreas Gruenbacher 	if (!mdev)
49272735a594SAndreas Gruenbacher 		return -EIO;
49281952e916SAndreas Gruenbacher 
492931890f4aSPhilipp Reisner 	D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
4930b411b363SPhilipp Reisner 
4931b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4932b411b363SPhilipp Reisner 
49331d53f09eSLars Ellenberg 	if (get_ldev(mdev)) {
4934b411b363SPhilipp Reisner 		drbd_rs_complete_io(mdev, sector);
4935b411b363SPhilipp Reisner 		drbd_set_in_sync(mdev, sector, blksize);
4936b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4937b411b363SPhilipp Reisner 		mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
49381d53f09eSLars Ellenberg 		put_ldev(mdev);
49391d53f09eSLars Ellenberg 	}
4940b411b363SPhilipp Reisner 	dec_rs_pending(mdev);
4941778f271dSPhilipp Reisner 	atomic_add(blksize >> 9, &mdev->rs_sect_in);
4942b411b363SPhilipp Reisner 
49432735a594SAndreas Gruenbacher 	return 0;
4944b411b363SPhilipp Reisner }
4945b411b363SPhilipp Reisner 
4946bc9c5c41SAndreas Gruenbacher static int
4947bc9c5c41SAndreas Gruenbacher validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4948bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
4949bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
4950b411b363SPhilipp Reisner {
4951b411b363SPhilipp Reisner 	struct drbd_request *req;
4952b411b363SPhilipp Reisner 	struct bio_and_error m;
4953b411b363SPhilipp Reisner 
495487eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
4955bc9c5c41SAndreas Gruenbacher 	req = find_request(mdev, root, id, sector, missing_ok, func);
4956b411b363SPhilipp Reisner 	if (unlikely(!req)) {
495787eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
495885997675SAndreas Gruenbacher 		return -EIO;
4959b411b363SPhilipp Reisner 	}
4960b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
496187eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
4962b411b363SPhilipp Reisner 
4963b411b363SPhilipp Reisner 	if (m.bio)
4964b411b363SPhilipp Reisner 		complete_master_bio(mdev, &m);
496585997675SAndreas Gruenbacher 	return 0;
4966b411b363SPhilipp Reisner }
4967b411b363SPhilipp Reisner 
49681952e916SAndreas Gruenbacher static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
4969b411b363SPhilipp Reisner {
49701952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
4971e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
4972b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
4973b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
4974b411b363SPhilipp Reisner 	enum drbd_req_event what;
4975b411b363SPhilipp Reisner 
49761952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
49771952e916SAndreas Gruenbacher 	if (!mdev)
49782735a594SAndreas Gruenbacher 		return -EIO;
49791952e916SAndreas Gruenbacher 
4980b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4981b411b363SPhilipp Reisner 
4982579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
4983b411b363SPhilipp Reisner 		drbd_set_in_sync(mdev, sector, blksize);
4984b411b363SPhilipp Reisner 		dec_rs_pending(mdev);
49852735a594SAndreas Gruenbacher 		return 0;
4986b411b363SPhilipp Reisner 	}
4987e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
4988b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
49898554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
4990b411b363SPhilipp Reisner 		break;
4991b411b363SPhilipp Reisner 	case P_WRITE_ACK:
49928554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
4993b411b363SPhilipp Reisner 		break;
4994b411b363SPhilipp Reisner 	case P_RECV_ACK:
49958554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
4996b411b363SPhilipp Reisner 		break;
4997d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
4998d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
49997be8da07SAndreas Gruenbacher 		break;
50007be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
50017be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5002b411b363SPhilipp Reisner 		break;
5003b411b363SPhilipp Reisner 	default:
50042735a594SAndreas Gruenbacher 		BUG();
5005b411b363SPhilipp Reisner 	}
5006b411b363SPhilipp Reisner 
5007b411b363SPhilipp Reisner 	return validate_req_change_req_state(mdev, p->block_id, sector,
5008bc9c5c41SAndreas Gruenbacher 					     &mdev->write_requests, __func__,
5009bc9c5c41SAndreas Gruenbacher 					     what, false);
5010b411b363SPhilipp Reisner }
5011b411b363SPhilipp Reisner 
50121952e916SAndreas Gruenbacher static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
5013b411b363SPhilipp Reisner {
50141952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
5015e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5016b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
50172deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
501885997675SAndreas Gruenbacher 	int err;
5019b411b363SPhilipp Reisner 
50201952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
50211952e916SAndreas Gruenbacher 	if (!mdev)
50222735a594SAndreas Gruenbacher 		return -EIO;
5023b411b363SPhilipp Reisner 
5024b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5025b411b363SPhilipp Reisner 
5026579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5027b411b363SPhilipp Reisner 		dec_rs_pending(mdev);
5028b411b363SPhilipp Reisner 		drbd_rs_failed_io(mdev, sector, size);
50292735a594SAndreas Gruenbacher 		return 0;
5030b411b363SPhilipp Reisner 	}
50312deb8336SPhilipp Reisner 
503285997675SAndreas Gruenbacher 	err = validate_req_change_req_state(mdev, p->block_id, sector,
5033bc9c5c41SAndreas Gruenbacher 					    &mdev->write_requests, __func__,
5034303d1448SPhilipp Reisner 					    NEG_ACKED, true);
503585997675SAndreas Gruenbacher 	if (err) {
50362deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
50372deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5038c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
50392deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
50402deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
50412deb8336SPhilipp Reisner 		drbd_set_out_of_sync(mdev, sector, size);
50422deb8336SPhilipp Reisner 	}
50432735a594SAndreas Gruenbacher 	return 0;
5044b411b363SPhilipp Reisner }
5045b411b363SPhilipp Reisner 
50461952e916SAndreas Gruenbacher static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
5047b411b363SPhilipp Reisner {
50481952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
5049e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5050b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5051b411b363SPhilipp Reisner 
50521952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
50531952e916SAndreas Gruenbacher 	if (!mdev)
50542735a594SAndreas Gruenbacher 		return -EIO;
50551952e916SAndreas Gruenbacher 
5056b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
50577be8da07SAndreas Gruenbacher 
5058380207d0SPhilipp Reisner 	dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
5059b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5060b411b363SPhilipp Reisner 
5061b411b363SPhilipp Reisner 	return validate_req_change_req_state(mdev, p->block_id, sector,
5062bc9c5c41SAndreas Gruenbacher 					     &mdev->read_requests, __func__,
50638554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5064b411b363SPhilipp Reisner }
5065b411b363SPhilipp Reisner 
50661952e916SAndreas Gruenbacher static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
5067b411b363SPhilipp Reisner {
50681952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
5069b411b363SPhilipp Reisner 	sector_t sector;
5070b411b363SPhilipp Reisner 	int size;
5071e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
50721952e916SAndreas Gruenbacher 
50731952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
50741952e916SAndreas Gruenbacher 	if (!mdev)
50752735a594SAndreas Gruenbacher 		return -EIO;
5076b411b363SPhilipp Reisner 
5077b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5078b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5079b411b363SPhilipp Reisner 
5080b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5081b411b363SPhilipp Reisner 
5082b411b363SPhilipp Reisner 	dec_rs_pending(mdev);
5083b411b363SPhilipp Reisner 
5084b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_FAILED)) {
5085b411b363SPhilipp Reisner 		drbd_rs_complete_io(mdev, sector);
5086e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5087d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5088b411b363SPhilipp Reisner 			drbd_rs_failed_io(mdev, sector, size);
5089d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5090d612d309SPhilipp Reisner 			break;
5091d612d309SPhilipp Reisner 		default:
50922735a594SAndreas Gruenbacher 			BUG();
5093d612d309SPhilipp Reisner 		}
5094b411b363SPhilipp Reisner 		put_ldev(mdev);
5095b411b363SPhilipp Reisner 	}
5096b411b363SPhilipp Reisner 
50972735a594SAndreas Gruenbacher 	return 0;
5098b411b363SPhilipp Reisner }
5099b411b363SPhilipp Reisner 
51001952e916SAndreas Gruenbacher static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
5101b411b363SPhilipp Reisner {
5102e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
51039ed57dcbSLars Ellenberg 	struct drbd_conf *mdev;
51049ed57dcbSLars Ellenberg 	int vnr;
5105b411b363SPhilipp Reisner 
51069ed57dcbSLars Ellenberg 	tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
5107b411b363SPhilipp Reisner 
51089ed57dcbSLars Ellenberg 	rcu_read_lock();
51099ed57dcbSLars Ellenberg 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5110c4752ef1SPhilipp Reisner 		if (mdev->state.conn == C_AHEAD &&
5111c4752ef1SPhilipp Reisner 		    atomic_read(&mdev->ap_in_flight) == 0 &&
511236baf611SPhilipp Reisner 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5113370a43e7SPhilipp Reisner 			mdev->start_resync_timer.expires = jiffies + HZ;
5114370a43e7SPhilipp Reisner 			add_timer(&mdev->start_resync_timer);
5115c4752ef1SPhilipp Reisner 		}
51169ed57dcbSLars Ellenberg 	}
51179ed57dcbSLars Ellenberg 	rcu_read_unlock();
5118c4752ef1SPhilipp Reisner 
51192735a594SAndreas Gruenbacher 	return 0;
5120b411b363SPhilipp Reisner }
5121b411b363SPhilipp Reisner 
51221952e916SAndreas Gruenbacher static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
5123b411b363SPhilipp Reisner {
51241952e916SAndreas Gruenbacher 	struct drbd_conf *mdev;
5125e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5126b411b363SPhilipp Reisner 	struct drbd_work *w;
5127b411b363SPhilipp Reisner 	sector_t sector;
5128b411b363SPhilipp Reisner 	int size;
5129b411b363SPhilipp Reisner 
51301952e916SAndreas Gruenbacher 	mdev = vnr_to_mdev(tconn, pi->vnr);
51311952e916SAndreas Gruenbacher 	if (!mdev)
51322735a594SAndreas Gruenbacher 		return -EIO;
51331952e916SAndreas Gruenbacher 
5134b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5135b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5136b411b363SPhilipp Reisner 
5137b411b363SPhilipp Reisner 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5138b411b363SPhilipp Reisner 
5139b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
51408f7bed77SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(mdev, sector, size);
5141b411b363SPhilipp Reisner 	else
51428f7bed77SAndreas Gruenbacher 		ov_out_of_sync_print(mdev);
5143b411b363SPhilipp Reisner 
51441d53f09eSLars Ellenberg 	if (!get_ldev(mdev))
51452735a594SAndreas Gruenbacher 		return 0;
51461d53f09eSLars Ellenberg 
5147b411b363SPhilipp Reisner 	drbd_rs_complete_io(mdev, sector);
5148b411b363SPhilipp Reisner 	dec_rs_pending(mdev);
5149b411b363SPhilipp Reisner 
5150ea5442afSLars Ellenberg 	--mdev->ov_left;
5151ea5442afSLars Ellenberg 
5152ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5153ea5442afSLars Ellenberg 	if ((mdev->ov_left & 0x200) == 0x200)
5154ea5442afSLars Ellenberg 		drbd_advance_rs_marks(mdev, mdev->ov_left);
5155ea5442afSLars Ellenberg 
5156ea5442afSLars Ellenberg 	if (mdev->ov_left == 0) {
5157b411b363SPhilipp Reisner 		w = kmalloc(sizeof(*w), GFP_NOIO);
5158b411b363SPhilipp Reisner 		if (w) {
5159b411b363SPhilipp Reisner 			w->cb = w_ov_finished;
5160a21e9298SPhilipp Reisner 			w->mdev = mdev;
5161d5b27b01SLars Ellenberg 			drbd_queue_work(&mdev->tconn->sender_work, w);
5162b411b363SPhilipp Reisner 		} else {
5163b411b363SPhilipp Reisner 			dev_err(DEV, "kmalloc(w) failed.");
51648f7bed77SAndreas Gruenbacher 			ov_out_of_sync_print(mdev);
5165b411b363SPhilipp Reisner 			drbd_resync_finished(mdev);
5166b411b363SPhilipp Reisner 		}
5167b411b363SPhilipp Reisner 	}
51681d53f09eSLars Ellenberg 	put_ldev(mdev);
51692735a594SAndreas Gruenbacher 	return 0;
5170b411b363SPhilipp Reisner }
5171b411b363SPhilipp Reisner 
51721952e916SAndreas Gruenbacher static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
51730ced55a3SPhilipp Reisner {
51742735a594SAndreas Gruenbacher 	return 0;
51750ced55a3SPhilipp Reisner }
51760ced55a3SPhilipp Reisner 
5177a990be46SAndreas Gruenbacher static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
517832862ec7SPhilipp Reisner {
5179082a3439SPhilipp Reisner 	struct drbd_conf *mdev;
5180c141ebdaSPhilipp Reisner 	int vnr, not_empty = 0;
518132862ec7SPhilipp Reisner 
518232862ec7SPhilipp Reisner 	do {
518332862ec7SPhilipp Reisner 		clear_bit(SIGNAL_ASENDER, &tconn->flags);
518432862ec7SPhilipp Reisner 		flush_signals(current);
5185c141ebdaSPhilipp Reisner 
5186c141ebdaSPhilipp Reisner 		rcu_read_lock();
5187c141ebdaSPhilipp Reisner 		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5188c141ebdaSPhilipp Reisner 			kref_get(&mdev->kref);
5189c141ebdaSPhilipp Reisner 			rcu_read_unlock();
5190d3fcb490SPhilipp Reisner 			if (drbd_finish_peer_reqs(mdev)) {
5191c141ebdaSPhilipp Reisner 				kref_put(&mdev->kref, &drbd_minor_destroy);
5192c141ebdaSPhilipp Reisner 				return 1;
5193082a3439SPhilipp Reisner 			}
5194c141ebdaSPhilipp Reisner 			kref_put(&mdev->kref, &drbd_minor_destroy);
5195c141ebdaSPhilipp Reisner 			rcu_read_lock();
5196d3fcb490SPhilipp Reisner 		}
519732862ec7SPhilipp Reisner 		set_bit(SIGNAL_ASENDER, &tconn->flags);
5198082a3439SPhilipp Reisner 
5199082a3439SPhilipp Reisner 		spin_lock_irq(&tconn->req_lock);
5200c141ebdaSPhilipp Reisner 		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5201082a3439SPhilipp Reisner 			not_empty = !list_empty(&mdev->done_ee);
5202082a3439SPhilipp Reisner 			if (not_empty)
5203082a3439SPhilipp Reisner 				break;
5204082a3439SPhilipp Reisner 		}
5205082a3439SPhilipp Reisner 		spin_unlock_irq(&tconn->req_lock);
5206c141ebdaSPhilipp Reisner 		rcu_read_unlock();
520732862ec7SPhilipp Reisner 	} while (not_empty);
520832862ec7SPhilipp Reisner 
520932862ec7SPhilipp Reisner 	return 0;
5210b411b363SPhilipp Reisner }
5211b411b363SPhilipp Reisner 
5212b411b363SPhilipp Reisner struct asender_cmd {
5213b411b363SPhilipp Reisner 	size_t pkt_size;
52141952e916SAndreas Gruenbacher 	int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
5215b411b363SPhilipp Reisner };
5216b411b363SPhilipp Reisner 
5217b411b363SPhilipp Reisner static struct asender_cmd asender_tbl[] = {
5218e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5219e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5220b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5221b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5222b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5223d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5224b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5225b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5226b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5227b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5228b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5229b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5230b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
523102918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5232d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
52331952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
52341952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5235b411b363SPhilipp Reisner };
5236b411b363SPhilipp Reisner 
5237b411b363SPhilipp Reisner int drbd_asender(struct drbd_thread *thi)
5238b411b363SPhilipp Reisner {
5239392c8801SPhilipp Reisner 	struct drbd_tconn *tconn = thi->tconn;
5240b411b363SPhilipp Reisner 	struct asender_cmd *cmd = NULL;
524177351055SPhilipp Reisner 	struct packet_info pi;
5242257d0af6SPhilipp Reisner 	int rv;
5243e658983aSAndreas Gruenbacher 	void *buf    = tconn->meta.rbuf;
5244b411b363SPhilipp Reisner 	int received = 0;
524552b061a4SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(tconn);
524652b061a4SAndreas Gruenbacher 	int expect   = header_size;
524744ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
524844ed167dSPhilipp Reisner 	struct net_conf *nc;
5249bb77d34eSAndreas Gruenbacher 	int ping_timeo, tcp_cork, ping_int;
5250b411b363SPhilipp Reisner 
5251b411b363SPhilipp Reisner 	current->policy = SCHED_RR;  /* Make this a realtime task! */
5252b411b363SPhilipp Reisner 	current->rt_priority = 2;    /* more important than all other tasks */
5253b411b363SPhilipp Reisner 
5254e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
525580822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
525644ed167dSPhilipp Reisner 
525744ed167dSPhilipp Reisner 		rcu_read_lock();
525844ed167dSPhilipp Reisner 		nc = rcu_dereference(tconn->net_conf);
525944ed167dSPhilipp Reisner 		ping_timeo = nc->ping_timeo;
5260bb77d34eSAndreas Gruenbacher 		tcp_cork = nc->tcp_cork;
526144ed167dSPhilipp Reisner 		ping_int = nc->ping_int;
526244ed167dSPhilipp Reisner 		rcu_read_unlock();
526344ed167dSPhilipp Reisner 
526432862ec7SPhilipp Reisner 		if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
5265a17647aaSAndreas Gruenbacher 			if (drbd_send_ping(tconn)) {
526632862ec7SPhilipp Reisner 				conn_err(tconn, "drbd_send_ping has failed\n");
5267841ce241SAndreas Gruenbacher 				goto reconnect;
5268841ce241SAndreas Gruenbacher 			}
526944ed167dSPhilipp Reisner 			tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
527044ed167dSPhilipp Reisner 			ping_timeout_active = true;
5271b411b363SPhilipp Reisner 		}
5272b411b363SPhilipp Reisner 
527332862ec7SPhilipp Reisner 		/* TODO: conditionally cork; it may hurt latency if we cork without
527432862ec7SPhilipp Reisner 		   much to send */
5275bb77d34eSAndreas Gruenbacher 		if (tcp_cork)
527632862ec7SPhilipp Reisner 			drbd_tcp_cork(tconn->meta.socket);
5277a990be46SAndreas Gruenbacher 		if (tconn_finish_peer_reqs(tconn)) {
5278a990be46SAndreas Gruenbacher 			conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
5279b411b363SPhilipp Reisner 			goto reconnect;
5280b411b363SPhilipp Reisner 		}
5281b411b363SPhilipp Reisner 		/* but unconditionally uncork unless disabled */
5282bb77d34eSAndreas Gruenbacher 		if (tcp_cork)
528332862ec7SPhilipp Reisner 			drbd_tcp_uncork(tconn->meta.socket);
5284b411b363SPhilipp Reisner 
5285b411b363SPhilipp Reisner 		/* short circuit, recv_msg would return EINTR anyways. */
5286b411b363SPhilipp Reisner 		if (signal_pending(current))
5287b411b363SPhilipp Reisner 			continue;
5288b411b363SPhilipp Reisner 
528932862ec7SPhilipp Reisner 		rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
529032862ec7SPhilipp Reisner 		clear_bit(SIGNAL_ASENDER, &tconn->flags);
5291b411b363SPhilipp Reisner 
5292b411b363SPhilipp Reisner 		flush_signals(current);
5293b411b363SPhilipp Reisner 
5294b411b363SPhilipp Reisner 		/* Note:
5295b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5296b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5297b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5298b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5299b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5300b411b363SPhilipp Reisner 		 * rv == expected: full header or command
5301b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
5302b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
5303b411b363SPhilipp Reisner 		 */
5304b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
5305b411b363SPhilipp Reisner 			received += rv;
5306b411b363SPhilipp Reisner 			buf	 += rv;
5307b411b363SPhilipp Reisner 		} else if (rv == 0) {
5308b66623e3SPhilipp Reisner 			if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
5309b66623e3SPhilipp Reisner 				long t;
5310b66623e3SPhilipp Reisner 				rcu_read_lock();
5311b66623e3SPhilipp Reisner 				t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
5312b66623e3SPhilipp Reisner 				rcu_read_unlock();
5313b66623e3SPhilipp Reisner 
5314b66623e3SPhilipp Reisner 				t = wait_event_timeout(tconn->ping_wait,
5315b66623e3SPhilipp Reisner 						       tconn->cstate < C_WF_REPORT_PARAMS,
5316b66623e3SPhilipp Reisner 						       t);
5317599377acSPhilipp Reisner 				if (t)
5318599377acSPhilipp Reisner 					break;
5319599377acSPhilipp Reisner 			}
532032862ec7SPhilipp Reisner 			conn_err(tconn, "meta connection shut down by peer.\n");
5321b411b363SPhilipp Reisner 			goto reconnect;
5322b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
5323cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
5324cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
532532862ec7SPhilipp Reisner 			if (time_after(tconn->last_received,
532632862ec7SPhilipp Reisner 				jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
5327cb6518cbSLars Ellenberg 				continue;
5328f36af18cSLars Ellenberg 			if (ping_timeout_active) {
532932862ec7SPhilipp Reisner 				conn_err(tconn, "PingAck did not arrive in time.\n");
5330b411b363SPhilipp Reisner 				goto reconnect;
5331b411b363SPhilipp Reisner 			}
533232862ec7SPhilipp Reisner 			set_bit(SEND_PING, &tconn->flags);
5333b411b363SPhilipp Reisner 			continue;
5334b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
5335b411b363SPhilipp Reisner 			continue;
5336b411b363SPhilipp Reisner 		} else {
533732862ec7SPhilipp Reisner 			conn_err(tconn, "sock_recvmsg returned %d\n", rv);
5338b411b363SPhilipp Reisner 			goto reconnect;
5339b411b363SPhilipp Reisner 		}
5340b411b363SPhilipp Reisner 
5341b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
5342e658983aSAndreas Gruenbacher 			if (decode_header(tconn, tconn->meta.rbuf, &pi))
5343b411b363SPhilipp Reisner 				goto reconnect;
53447201b972SAndreas Gruenbacher 			cmd = &asender_tbl[pi.cmd];
53451952e916SAndreas Gruenbacher 			if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
53462fcb8f30SAndreas Gruenbacher 				conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
53472fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
5348b411b363SPhilipp Reisner 				goto disconnect;
5349b411b363SPhilipp Reisner 			}
5350e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
535152b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
535232862ec7SPhilipp Reisner 				conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
535377351055SPhilipp Reisner 					pi.cmd, pi.size);
5354b411b363SPhilipp Reisner 				goto reconnect;
5355b411b363SPhilipp Reisner 			}
5356257d0af6SPhilipp Reisner 		}
5357b411b363SPhilipp Reisner 		if (received == expect) {
53582735a594SAndreas Gruenbacher 			bool err;
5359a4fbda8eSPhilipp Reisner 
53602735a594SAndreas Gruenbacher 			err = cmd->fn(tconn, &pi);
53612735a594SAndreas Gruenbacher 			if (err) {
53621952e916SAndreas Gruenbacher 				conn_err(tconn, "%pf failed\n", cmd->fn);
5363b411b363SPhilipp Reisner 				goto reconnect;
53641952e916SAndreas Gruenbacher 			}
5365b411b363SPhilipp Reisner 
5366a4fbda8eSPhilipp Reisner 			tconn->last_received = jiffies;
5367f36af18cSLars Ellenberg 
536844ed167dSPhilipp Reisner 			if (cmd == &asender_tbl[P_PING_ACK]) {
536944ed167dSPhilipp Reisner 				/* restore idle timeout */
537044ed167dSPhilipp Reisner 				tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
537144ed167dSPhilipp Reisner 				ping_timeout_active = false;
537244ed167dSPhilipp Reisner 			}
5373b411b363SPhilipp Reisner 
5374e658983aSAndreas Gruenbacher 			buf	 = tconn->meta.rbuf;
5375b411b363SPhilipp Reisner 			received = 0;
537652b061a4SAndreas Gruenbacher 			expect	 = header_size;
5377b411b363SPhilipp Reisner 			cmd	 = NULL;
5378b411b363SPhilipp Reisner 		}
5379b411b363SPhilipp Reisner 	}
5380b411b363SPhilipp Reisner 
5381b411b363SPhilipp Reisner 	if (0) {
5382b411b363SPhilipp Reisner reconnect:
5383bbeb641cSPhilipp Reisner 		conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
538419fffd7bSPhilipp Reisner 		conn_md_sync(tconn);
5385b411b363SPhilipp Reisner 	}
5386b411b363SPhilipp Reisner 	if (0) {
5387b411b363SPhilipp Reisner disconnect:
5388bbeb641cSPhilipp Reisner 		conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
5389b411b363SPhilipp Reisner 	}
539032862ec7SPhilipp Reisner 	clear_bit(SIGNAL_ASENDER, &tconn->flags);
5391b411b363SPhilipp Reisner 
539232862ec7SPhilipp Reisner 	conn_info(tconn, "asender terminated\n");
5393b411b363SPhilipp Reisner 
5394b411b363SPhilipp Reisner 	return 0;
5395b411b363SPhilipp Reisner }
5396