1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_receiver.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner  */
24b411b363SPhilipp Reisner 
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner 
28b411b363SPhilipp Reisner #include <asm/uaccess.h>
29b411b363SPhilipp Reisner #include <net/sock.h>
30b411b363SPhilipp Reisner 
31b411b363SPhilipp Reisner #include <linux/drbd.h>
32b411b363SPhilipp Reisner #include <linux/fs.h>
33b411b363SPhilipp Reisner #include <linux/file.h>
34b411b363SPhilipp Reisner #include <linux/in.h>
35b411b363SPhilipp Reisner #include <linux/mm.h>
36b411b363SPhilipp Reisner #include <linux/memcontrol.h>
37b411b363SPhilipp Reisner #include <linux/mm_inline.h>
38b411b363SPhilipp Reisner #include <linux/slab.h>
39b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
40b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
41b411b363SPhilipp Reisner #include <linux/unistd.h>
42b411b363SPhilipp Reisner #include <linux/vmalloc.h>
43b411b363SPhilipp Reisner #include <linux/random.h>
44b411b363SPhilipp Reisner #include <linux/string.h>
45b411b363SPhilipp Reisner #include <linux/scatterlist.h>
46b411b363SPhilipp Reisner #include "drbd_int.h"
47a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
48b411b363SPhilipp Reisner #include "drbd_req.h"
49b411b363SPhilipp Reisner #include "drbd_vli.h"
50b411b363SPhilipp Reisner 
5120c68fdeSLars Ellenberg #define PRO_FEATURES (FF_TRIM)
5220c68fdeSLars Ellenberg 
5377351055SPhilipp Reisner struct packet_info {
5477351055SPhilipp Reisner 	enum drbd_packet cmd;
55e2857216SAndreas Gruenbacher 	unsigned int size;
56e2857216SAndreas Gruenbacher 	unsigned int vnr;
57e658983aSAndreas Gruenbacher 	void *data;
5877351055SPhilipp Reisner };
5977351055SPhilipp Reisner 
60b411b363SPhilipp Reisner enum finish_epoch {
61b411b363SPhilipp Reisner 	FE_STILL_LIVE,
62b411b363SPhilipp Reisner 	FE_DESTROYED,
63b411b363SPhilipp Reisner 	FE_RECYCLED,
64b411b363SPhilipp Reisner };
65b411b363SPhilipp Reisner 
66bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
67bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
6869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
69a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
70bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
7199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
72b411b363SPhilipp Reisner 
73b411b363SPhilipp Reisner 
74b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
75b411b363SPhilipp Reisner 
7645bb912bSLars Ellenberg /*
7745bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
7845bb912bSLars Ellenberg  * page->private being our "next" pointer.
7945bb912bSLars Ellenberg  */
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
8245bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
8345bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
8445bb912bSLars Ellenberg  */
8545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
8645bb912bSLars Ellenberg {
8745bb912bSLars Ellenberg 	struct page *page;
8845bb912bSLars Ellenberg 	struct page *tmp;
8945bb912bSLars Ellenberg 
9045bb912bSLars Ellenberg 	BUG_ON(!n);
9145bb912bSLars Ellenberg 	BUG_ON(!head);
9245bb912bSLars Ellenberg 
9345bb912bSLars Ellenberg 	page = *head;
9423ce4227SPhilipp Reisner 
9523ce4227SPhilipp Reisner 	if (!page)
9623ce4227SPhilipp Reisner 		return NULL;
9723ce4227SPhilipp Reisner 
9845bb912bSLars Ellenberg 	while (page) {
9945bb912bSLars Ellenberg 		tmp = page_chain_next(page);
10045bb912bSLars Ellenberg 		if (--n == 0)
10145bb912bSLars Ellenberg 			break; /* found sufficient pages */
10245bb912bSLars Ellenberg 		if (tmp == NULL)
10345bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
10445bb912bSLars Ellenberg 			return NULL;
10545bb912bSLars Ellenberg 		page = tmp;
10645bb912bSLars Ellenberg 	}
10745bb912bSLars Ellenberg 
10845bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10945bb912bSLars Ellenberg 	set_page_private(page, 0);
11045bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
11145bb912bSLars Ellenberg 	page = *head;
11245bb912bSLars Ellenberg 	*head = tmp;
11345bb912bSLars Ellenberg 	return page;
11445bb912bSLars Ellenberg }
11545bb912bSLars Ellenberg 
11645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
11745bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
11845bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
12045bb912bSLars Ellenberg {
12145bb912bSLars Ellenberg 	struct page *tmp;
12245bb912bSLars Ellenberg 	int i = 1;
12345bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page)))
12445bb912bSLars Ellenberg 		++i, page = tmp;
12545bb912bSLars Ellenberg 	if (len)
12645bb912bSLars Ellenberg 		*len = i;
12745bb912bSLars Ellenberg 	return page;
12845bb912bSLars Ellenberg }
12945bb912bSLars Ellenberg 
13045bb912bSLars Ellenberg static int page_chain_free(struct page *page)
13145bb912bSLars Ellenberg {
13245bb912bSLars Ellenberg 	struct page *tmp;
13345bb912bSLars Ellenberg 	int i = 0;
13445bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
13545bb912bSLars Ellenberg 		put_page(page);
13645bb912bSLars Ellenberg 		++i;
13745bb912bSLars Ellenberg 	}
13845bb912bSLars Ellenberg 	return i;
13945bb912bSLars Ellenberg }
14045bb912bSLars Ellenberg 
14145bb912bSLars Ellenberg static void page_chain_add(struct page **head,
14245bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
14345bb912bSLars Ellenberg {
14445bb912bSLars Ellenberg #if 1
14545bb912bSLars Ellenberg 	struct page *tmp;
14645bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14745bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14845bb912bSLars Ellenberg #endif
14945bb912bSLars Ellenberg 
15045bb912bSLars Ellenberg 	/* add chain to head */
15145bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
15245bb912bSLars Ellenberg 	*head = chain_first;
15345bb912bSLars Ellenberg }
15445bb912bSLars Ellenberg 
155b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
15618c2d522SAndreas Gruenbacher 				       unsigned int number)
157b411b363SPhilipp Reisner {
158b411b363SPhilipp Reisner 	struct page *page = NULL;
15945bb912bSLars Ellenberg 	struct page *tmp = NULL;
16018c2d522SAndreas Gruenbacher 	unsigned int i = 0;
161b411b363SPhilipp Reisner 
162b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
163b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
16445bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
165b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
16645bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16745bb912bSLars Ellenberg 		if (page)
16845bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
169b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
17045bb912bSLars Ellenberg 		if (page)
17145bb912bSLars Ellenberg 			return page;
172b411b363SPhilipp Reisner 	}
17345bb912bSLars Ellenberg 
174b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
176b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17745bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17845bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17945bb912bSLars Ellenberg 		if (!tmp)
18045bb912bSLars Ellenberg 			break;
18145bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
18245bb912bSLars Ellenberg 		page = tmp;
18345bb912bSLars Ellenberg 	}
18445bb912bSLars Ellenberg 
18545bb912bSLars Ellenberg 	if (i == number)
186b411b363SPhilipp Reisner 		return page;
18745bb912bSLars Ellenberg 
18845bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
189c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
19045bb912bSLars Ellenberg 	 * function "soon". */
19145bb912bSLars Ellenberg 	if (page) {
19245bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
19345bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
19445bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
19545bb912bSLars Ellenberg 		drbd_pp_vacant += i;
19645bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19745bb912bSLars Ellenberg 	}
19845bb912bSLars Ellenberg 	return NULL;
199b411b363SPhilipp Reisner }
200b411b363SPhilipp Reisner 
201b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
202a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
203b411b363SPhilipp Reisner {
204a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
205b411b363SPhilipp Reisner 
206b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
207b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
208b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
209b411b363SPhilipp Reisner 	   stop to examine the list... */
210b411b363SPhilipp Reisner 
211a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
212045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
213b411b363SPhilipp Reisner 			break;
214a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
215b411b363SPhilipp Reisner 	}
216b411b363SPhilipp Reisner }
217b411b363SPhilipp Reisner 
218668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
219b411b363SPhilipp Reisner {
220b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
221db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
222b411b363SPhilipp Reisner 
2230500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
224b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2250500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
226a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
227b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
228b411b363SPhilipp Reisner }
229b411b363SPhilipp Reisner 
230668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
231668700b4SPhilipp Reisner {
232668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
233668700b4SPhilipp Reisner 	int vnr;
234668700b4SPhilipp Reisner 
235668700b4SPhilipp Reisner 	rcu_read_lock();
236668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
237668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
238668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
239668700b4SPhilipp Reisner 			continue;
240668700b4SPhilipp Reisner 
241668700b4SPhilipp Reisner 		kref_get(&device->kref);
242668700b4SPhilipp Reisner 		rcu_read_unlock();
243668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
244668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
245668700b4SPhilipp Reisner 		rcu_read_lock();
246668700b4SPhilipp Reisner 	}
247668700b4SPhilipp Reisner 	rcu_read_unlock();
248668700b4SPhilipp Reisner }
249668700b4SPhilipp Reisner 
250b411b363SPhilipp Reisner /**
251c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
252b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
25345bb912bSLars Ellenberg  * @number:	number of pages requested
25445bb912bSLars Ellenberg  * @retry:	whether to retry, if not enough pages are available right now
255b411b363SPhilipp Reisner  *
25645bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2570e49d7b0SLars Ellenberg  * the kernel.
25845bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25945bb912bSLars Ellenberg  *
2600e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2610e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2620e49d7b0SLars Ellenberg  *
2630e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2640e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2650e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2660e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2670e49d7b0SLars Ellenberg  *
26845bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
269b411b363SPhilipp Reisner  */
27069a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
271c37c8ecfSAndreas Gruenbacher 			      bool retry)
272b411b363SPhilipp Reisner {
27369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
274b411b363SPhilipp Reisner 	struct page *page = NULL;
27544ed167dSPhilipp Reisner 	struct net_conf *nc;
276b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2770e49d7b0SLars Ellenberg 	unsigned int mxb;
278b411b363SPhilipp Reisner 
27944ed167dSPhilipp Reisner 	rcu_read_lock();
28069a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
28144ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
28244ed167dSPhilipp Reisner 	rcu_read_unlock();
28344ed167dSPhilipp Reisner 
284b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
285b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
286b411b363SPhilipp Reisner 
287668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
288668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
289668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
290668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
291668700b4SPhilipp Reisner 
29245bb912bSLars Ellenberg 	while (page == NULL) {
293b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
294b411b363SPhilipp Reisner 
295668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
296b411b363SPhilipp Reisner 
297b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
298b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
299b411b363SPhilipp Reisner 			if (page)
300b411b363SPhilipp Reisner 				break;
301b411b363SPhilipp Reisner 		}
302b411b363SPhilipp Reisner 
303b411b363SPhilipp Reisner 		if (!retry)
304b411b363SPhilipp Reisner 			break;
305b411b363SPhilipp Reisner 
306b411b363SPhilipp Reisner 		if (signal_pending(current)) {
307d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
308b411b363SPhilipp Reisner 			break;
309b411b363SPhilipp Reisner 		}
310b411b363SPhilipp Reisner 
3110e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3120e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
313b411b363SPhilipp Reisner 	}
314b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
315b411b363SPhilipp Reisner 
31645bb912bSLars Ellenberg 	if (page)
317b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
318b411b363SPhilipp Reisner 	return page;
319b411b363SPhilipp Reisner }
320b411b363SPhilipp Reisner 
321c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3220500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
32345bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
32445bb912bSLars Ellenberg  * or returns all pages to the system. */
325b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
326b411b363SPhilipp Reisner {
327b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
328b411b363SPhilipp Reisner 	int i;
329435f0740SLars Ellenberg 
330a73ff323SLars Ellenberg 	if (page == NULL)
331a73ff323SLars Ellenberg 		return;
332a73ff323SLars Ellenberg 
3331816a2b4SLars Ellenberg 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
33445bb912bSLars Ellenberg 		i = page_chain_free(page);
33545bb912bSLars Ellenberg 	else {
33645bb912bSLars Ellenberg 		struct page *tmp;
33745bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
338b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33945bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
34045bb912bSLars Ellenberg 		drbd_pp_vacant += i;
341b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
342b411b363SPhilipp Reisner 	}
343435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
34445bb912bSLars Ellenberg 	if (i < 0)
345d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
346435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
347b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
348b411b363SPhilipp Reisner }
349b411b363SPhilipp Reisner 
350b411b363SPhilipp Reisner /*
351b411b363SPhilipp Reisner You need to hold the req_lock:
352b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
353b411b363SPhilipp Reisner 
354b411b363SPhilipp Reisner You must not have the req_lock:
3553967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3560db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3577721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
358b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
359a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
360b411b363SPhilipp Reisner  drbd_clear_done_ee()
361b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
362b411b363SPhilipp Reisner */
363b411b363SPhilipp Reisner 
364f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36569a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
366a0fb3c47SLars Ellenberg 		    unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
367b411b363SPhilipp Reisner {
36869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
369db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
370a73ff323SLars Ellenberg 	struct page *page = NULL;
37145bb912bSLars Ellenberg 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
372b411b363SPhilipp Reisner 
373b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
374b411b363SPhilipp Reisner 		return NULL;
375b411b363SPhilipp Reisner 
376db830c46SAndreas Gruenbacher 	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
377db830c46SAndreas Gruenbacher 	if (!peer_req) {
378b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
379d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
380b411b363SPhilipp Reisner 		return NULL;
381b411b363SPhilipp Reisner 	}
382b411b363SPhilipp Reisner 
383a0fb3c47SLars Ellenberg 	if (has_payload && data_size) {
384d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
385d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38645bb912bSLars Ellenberg 		if (!page)
38745bb912bSLars Ellenberg 			goto fail;
388a73ff323SLars Ellenberg 	}
389b411b363SPhilipp Reisner 
390c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
391c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
392db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
393db830c46SAndreas Gruenbacher 	peer_req->i.size = data_size;
394db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
395c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
396a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
397db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3989a8e7753SAndreas Gruenbacher 	/*
3999a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
4009a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
4019a8e7753SAndreas Gruenbacher 	 */
402db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
403b411b363SPhilipp Reisner 
404db830c46SAndreas Gruenbacher 	return peer_req;
405b411b363SPhilipp Reisner 
40645bb912bSLars Ellenberg  fail:
407db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
408b411b363SPhilipp Reisner 	return NULL;
409b411b363SPhilipp Reisner }
410b411b363SPhilipp Reisner 
411b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
412f6ffca9fSAndreas Gruenbacher 		       int is_net)
413b411b363SPhilipp Reisner {
41421ae5d7fSLars Ellenberg 	might_sleep();
415db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
416db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
417b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4180b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4190b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
42021ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
42121ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
42221ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
42321ae5d7fSLars Ellenberg 	}
424db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
425b411b363SPhilipp Reisner }
426b411b363SPhilipp Reisner 
427b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
428b411b363SPhilipp Reisner {
429b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
430db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
431b411b363SPhilipp Reisner 	int count = 0;
432b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
433b411b363SPhilipp Reisner 
4340500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
435b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4360500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
437b411b363SPhilipp Reisner 
438a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
439b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
440b411b363SPhilipp Reisner 		count++;
441b411b363SPhilipp Reisner 	}
442b411b363SPhilipp Reisner 	return count;
443b411b363SPhilipp Reisner }
444b411b363SPhilipp Reisner 
445b411b363SPhilipp Reisner /*
446a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
447b411b363SPhilipp Reisner  */
448b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
449b411b363SPhilipp Reisner {
450b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
451b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
452db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
453e2b3032bSAndreas Gruenbacher 	int err = 0;
454b411b363SPhilipp Reisner 
4550500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
456b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
457b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4580500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
459b411b363SPhilipp Reisner 
460a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
461b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
462b411b363SPhilipp Reisner 
463b411b363SPhilipp Reisner 	/* possible callbacks here:
464d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
465b411b363SPhilipp Reisner 	 * all ignore the last argument.
466b411b363SPhilipp Reisner 	 */
467a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
468e2b3032bSAndreas Gruenbacher 		int err2;
469e2b3032bSAndreas Gruenbacher 
470b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
471a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
472e2b3032bSAndreas Gruenbacher 		if (!err)
473e2b3032bSAndreas Gruenbacher 			err = err2;
474b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
475b411b363SPhilipp Reisner 	}
476b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
477b411b363SPhilipp Reisner 
478e2b3032bSAndreas Gruenbacher 	return err;
479b411b363SPhilipp Reisner }
480b411b363SPhilipp Reisner 
481b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
482d4da1537SAndreas Gruenbacher 				     struct list_head *head)
483b411b363SPhilipp Reisner {
484b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
485b411b363SPhilipp Reisner 
486b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
487b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
488b411b363SPhilipp Reisner 	while (!list_empty(head)) {
489b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4900500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4917eaceaccSJens Axboe 		io_schedule();
492b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4930500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
494b411b363SPhilipp Reisner 	}
495b411b363SPhilipp Reisner }
496b411b363SPhilipp Reisner 
497b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
498d4da1537SAndreas Gruenbacher 				    struct list_head *head)
499b411b363SPhilipp Reisner {
5000500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
501b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
5020500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
503b411b363SPhilipp Reisner }
504b411b363SPhilipp Reisner 
505dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
506b411b363SPhilipp Reisner {
507b411b363SPhilipp Reisner 	struct kvec iov = {
508b411b363SPhilipp Reisner 		.iov_base = buf,
509b411b363SPhilipp Reisner 		.iov_len = size,
510b411b363SPhilipp Reisner 	};
511b411b363SPhilipp Reisner 	struct msghdr msg = {
512b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
513b411b363SPhilipp Reisner 	};
514f730c848SAl Viro 	return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
515b411b363SPhilipp Reisner }
516b411b363SPhilipp Reisner 
517bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
518b411b363SPhilipp Reisner {
519b411b363SPhilipp Reisner 	int rv;
520b411b363SPhilipp Reisner 
521bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
522b411b363SPhilipp Reisner 
523b411b363SPhilipp Reisner 	if (rv < 0) {
524b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5251ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
526b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5271ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
528b411b363SPhilipp Reisner 	} else if (rv == 0) {
529bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
530b66623e3SPhilipp Reisner 			long t;
531b66623e3SPhilipp Reisner 			rcu_read_lock();
532bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
533b66623e3SPhilipp Reisner 			rcu_read_unlock();
534b66623e3SPhilipp Reisner 
535bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
536b66623e3SPhilipp Reisner 
537599377acSPhilipp Reisner 			if (t)
538599377acSPhilipp Reisner 				goto out;
539599377acSPhilipp Reisner 		}
5401ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
541599377acSPhilipp Reisner 	}
542599377acSPhilipp Reisner 
543b411b363SPhilipp Reisner 	if (rv != size)
544bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
545b411b363SPhilipp Reisner 
546599377acSPhilipp Reisner out:
547b411b363SPhilipp Reisner 	return rv;
548b411b363SPhilipp Reisner }
549b411b363SPhilipp Reisner 
550bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
551c6967746SAndreas Gruenbacher {
552c6967746SAndreas Gruenbacher 	int err;
553c6967746SAndreas Gruenbacher 
554bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
555c6967746SAndreas Gruenbacher 	if (err != size) {
556c6967746SAndreas Gruenbacher 		if (err >= 0)
557c6967746SAndreas Gruenbacher 			err = -EIO;
558c6967746SAndreas Gruenbacher 	} else
559c6967746SAndreas Gruenbacher 		err = 0;
560c6967746SAndreas Gruenbacher 	return err;
561c6967746SAndreas Gruenbacher }
562c6967746SAndreas Gruenbacher 
563bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
564a5c31904SAndreas Gruenbacher {
565a5c31904SAndreas Gruenbacher 	int err;
566a5c31904SAndreas Gruenbacher 
567bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
568a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5691ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
570a5c31904SAndreas Gruenbacher 	return err;
571a5c31904SAndreas Gruenbacher }
572a5c31904SAndreas Gruenbacher 
5735dbf1673SLars Ellenberg /* quoting tcp(7):
5745dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5755dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5765dbf1673SLars Ellenberg  * This is our wrapper to do so.
5775dbf1673SLars Ellenberg  */
5785dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5795dbf1673SLars Ellenberg 		unsigned int rcv)
5805dbf1673SLars Ellenberg {
5815dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5825dbf1673SLars Ellenberg 	if (snd) {
5835dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5845dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5855dbf1673SLars Ellenberg 	}
5865dbf1673SLars Ellenberg 	if (rcv) {
5875dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5885dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5895dbf1673SLars Ellenberg 	}
5905dbf1673SLars Ellenberg }
5915dbf1673SLars Ellenberg 
592bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
593b411b363SPhilipp Reisner {
594b411b363SPhilipp Reisner 	const char *what;
595b411b363SPhilipp Reisner 	struct socket *sock;
596b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59744ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59844ed167dSPhilipp Reisner 	struct net_conf *nc;
59944ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
60069ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
601b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
602b411b363SPhilipp Reisner 
60344ed167dSPhilipp Reisner 	rcu_read_lock();
604bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60544ed167dSPhilipp Reisner 	if (!nc) {
60644ed167dSPhilipp Reisner 		rcu_read_unlock();
607b411b363SPhilipp Reisner 		return NULL;
60844ed167dSPhilipp Reisner 	}
60944ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
61044ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
61169ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
612089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
615bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61644ed167dSPhilipp Reisner 
617bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61844ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61944ed167dSPhilipp Reisner 	else
62044ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
62144ed167dSPhilipp Reisner 
622bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
623bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
624b411b363SPhilipp Reisner 
625b411b363SPhilipp Reisner 	what = "sock_create_kern";
626eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
627b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
628b411b363SPhilipp Reisner 	if (err < 0) {
629b411b363SPhilipp Reisner 		sock = NULL;
630b411b363SPhilipp Reisner 		goto out;
631b411b363SPhilipp Reisner 	}
632b411b363SPhilipp Reisner 
633b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63469ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63544ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
636b411b363SPhilipp Reisner 
637b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
638b411b363SPhilipp Reisner 	*  for the outgoing connections.
639b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
640b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
641b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
642b411b363SPhilipp Reisner 	*  a free one dynamically.
643b411b363SPhilipp Reisner 	*/
644b411b363SPhilipp Reisner 	what = "bind before connect";
64544ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
646b411b363SPhilipp Reisner 	if (err < 0)
647b411b363SPhilipp Reisner 		goto out;
648b411b363SPhilipp Reisner 
649b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
650b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
651b411b363SPhilipp Reisner 	disconnect_on_error = 0;
652b411b363SPhilipp Reisner 	what = "connect";
65344ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
654b411b363SPhilipp Reisner 
655b411b363SPhilipp Reisner out:
656b411b363SPhilipp Reisner 	if (err < 0) {
657b411b363SPhilipp Reisner 		if (sock) {
658b411b363SPhilipp Reisner 			sock_release(sock);
659b411b363SPhilipp Reisner 			sock = NULL;
660b411b363SPhilipp Reisner 		}
661b411b363SPhilipp Reisner 		switch (-err) {
662b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
663b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
664b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
665b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
666b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
667b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
668b411b363SPhilipp Reisner 			disconnect_on_error = 0;
669b411b363SPhilipp Reisner 			break;
670b411b363SPhilipp Reisner 		default:
6711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
672b411b363SPhilipp Reisner 		}
673b411b363SPhilipp Reisner 		if (disconnect_on_error)
674bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
675b411b363SPhilipp Reisner 	}
67644ed167dSPhilipp Reisner 
677b411b363SPhilipp Reisner 	return sock;
678b411b363SPhilipp Reisner }
679b411b363SPhilipp Reisner 
6807a426fd8SPhilipp Reisner struct accept_wait_data {
681bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6827a426fd8SPhilipp Reisner 	struct socket *s_listen;
6837a426fd8SPhilipp Reisner 	struct completion door_bell;
6847a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6857a426fd8SPhilipp Reisner 
6867a426fd8SPhilipp Reisner };
6877a426fd8SPhilipp Reisner 
688715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
689b411b363SPhilipp Reisner {
6907a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
691715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6927a426fd8SPhilipp Reisner 
693715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
694715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6957a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
696715306f6SAndreas Gruenbacher 	state_change(sk);
6977a426fd8SPhilipp Reisner }
6987a426fd8SPhilipp Reisner 
699bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
700b411b363SPhilipp Reisner {
7011f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
70244ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7031f3e509bSPhilipp Reisner 	struct socket *s_listen;
70444ed167dSPhilipp Reisner 	struct net_conf *nc;
705b411b363SPhilipp Reisner 	const char *what;
706b411b363SPhilipp Reisner 
70744ed167dSPhilipp Reisner 	rcu_read_lock();
708bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70944ed167dSPhilipp Reisner 	if (!nc) {
71044ed167dSPhilipp Reisner 		rcu_read_unlock();
7117a426fd8SPhilipp Reisner 		return -EIO;
71244ed167dSPhilipp Reisner 	}
71344ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71444ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71544ed167dSPhilipp Reisner 	rcu_read_unlock();
716b411b363SPhilipp Reisner 
717bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
718bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
719b411b363SPhilipp Reisner 
720b411b363SPhilipp Reisner 	what = "sock_create_kern";
721eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
722b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
723b411b363SPhilipp Reisner 	if (err) {
724b411b363SPhilipp Reisner 		s_listen = NULL;
725b411b363SPhilipp Reisner 		goto out;
726b411b363SPhilipp Reisner 	}
727b411b363SPhilipp Reisner 
7284a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72944ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
730b411b363SPhilipp Reisner 
731b411b363SPhilipp Reisner 	what = "bind before listen";
73244ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
733b411b363SPhilipp Reisner 	if (err < 0)
734b411b363SPhilipp Reisner 		goto out;
735b411b363SPhilipp Reisner 
7367a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7377a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7387a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
739715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7407a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7417a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
742b411b363SPhilipp Reisner 
7432820fd39SPhilipp Reisner 	what = "listen";
7442820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7452820fd39SPhilipp Reisner 	if (err < 0)
7462820fd39SPhilipp Reisner 		goto out;
7472820fd39SPhilipp Reisner 
7487a426fd8SPhilipp Reisner 	return 0;
749b411b363SPhilipp Reisner out:
750b411b363SPhilipp Reisner 	if (s_listen)
751b411b363SPhilipp Reisner 		sock_release(s_listen);
752b411b363SPhilipp Reisner 	if (err < 0) {
753b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7541ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
755bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
756b411b363SPhilipp Reisner 		}
757b411b363SPhilipp Reisner 	}
7581f3e509bSPhilipp Reisner 
7597a426fd8SPhilipp Reisner 	return -EIO;
7601f3e509bSPhilipp Reisner }
7611f3e509bSPhilipp Reisner 
762715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
763715306f6SAndreas Gruenbacher {
764715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
766715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
767715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
768715306f6SAndreas Gruenbacher }
769715306f6SAndreas Gruenbacher 
770bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7711f3e509bSPhilipp Reisner {
7721f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7731f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7741f3e509bSPhilipp Reisner 	struct net_conf *nc;
7751f3e509bSPhilipp Reisner 
7761f3e509bSPhilipp Reisner 	rcu_read_lock();
777bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7781f3e509bSPhilipp Reisner 	if (!nc) {
7791f3e509bSPhilipp Reisner 		rcu_read_unlock();
7801f3e509bSPhilipp Reisner 		return NULL;
7811f3e509bSPhilipp Reisner 	}
7821f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7831f3e509bSPhilipp Reisner 	rcu_read_unlock();
7841f3e509bSPhilipp Reisner 
7851f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78638b682b2SAkinobu Mita 	/* 28.5% random jitter */
78738b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7881f3e509bSPhilipp Reisner 
7897a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7907a426fd8SPhilipp Reisner 	if (err <= 0)
7917a426fd8SPhilipp Reisner 		return NULL;
7921f3e509bSPhilipp Reisner 
7937a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
794b411b363SPhilipp Reisner 	if (err < 0) {
795b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7961ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
797bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
798b411b363SPhilipp Reisner 		}
799b411b363SPhilipp Reisner 	}
800b411b363SPhilipp Reisner 
801715306f6SAndreas Gruenbacher 	if (s_estab)
802715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
803b411b363SPhilipp Reisner 
804b411b363SPhilipp Reisner 	return s_estab;
805b411b363SPhilipp Reisner }
806b411b363SPhilipp Reisner 
807bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
808b411b363SPhilipp Reisner 
809bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8109f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8119f5bdc33SAndreas Gruenbacher {
812bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8139f5bdc33SAndreas Gruenbacher 		return -EIO;
814bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
815b411b363SPhilipp Reisner }
816b411b363SPhilipp Reisner 
817bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
818b411b363SPhilipp Reisner {
819bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8209f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8214920e37aSPhilipp Reisner 	struct net_conf *nc;
8229f5bdc33SAndreas Gruenbacher 	int err;
823b411b363SPhilipp Reisner 
8244920e37aSPhilipp Reisner 	rcu_read_lock();
8254920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8264920e37aSPhilipp Reisner 	if (!nc) {
8274920e37aSPhilipp Reisner 		rcu_read_unlock();
8284920e37aSPhilipp Reisner 		return -EIO;
8294920e37aSPhilipp Reisner 	}
8304920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8314920e37aSPhilipp Reisner 	rcu_read_unlock();
8324920e37aSPhilipp Reisner 
833bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8349f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8359f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8369f5bdc33SAndreas Gruenbacher 			err = -EIO;
8379f5bdc33SAndreas Gruenbacher 		return err;
8389f5bdc33SAndreas Gruenbacher 	}
839bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8409f5bdc33SAndreas Gruenbacher 	if (err)
8419f5bdc33SAndreas Gruenbacher 		return err;
8429f5bdc33SAndreas Gruenbacher 	return pi.cmd;
843b411b363SPhilipp Reisner }
844b411b363SPhilipp Reisner 
845b411b363SPhilipp Reisner /**
846b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
847b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
848b411b363SPhilipp Reisner  */
8495d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
850b411b363SPhilipp Reisner {
851b411b363SPhilipp Reisner 	int rr;
852b411b363SPhilipp Reisner 	char tb[4];
853b411b363SPhilipp Reisner 
854b411b363SPhilipp Reisner 	if (!*sock)
85581e84650SAndreas Gruenbacher 		return false;
856b411b363SPhilipp Reisner 
857dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
858b411b363SPhilipp Reisner 
859b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
86081e84650SAndreas Gruenbacher 		return true;
861b411b363SPhilipp Reisner 	} else {
862b411b363SPhilipp Reisner 		sock_release(*sock);
863b411b363SPhilipp Reisner 		*sock = NULL;
86481e84650SAndreas Gruenbacher 		return false;
865b411b363SPhilipp Reisner 	}
866b411b363SPhilipp Reisner }
8675d0b17f1SPhilipp Reisner 
8685d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8695d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8705d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8715d0b17f1SPhilipp Reisner {
8725d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8735d0b17f1SPhilipp Reisner 	int timeout;
8745d0b17f1SPhilipp Reisner 	bool ok;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8775d0b17f1SPhilipp Reisner 		return false;
8785d0b17f1SPhilipp Reisner 
8795d0b17f1SPhilipp Reisner 	rcu_read_lock();
8805d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8815d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8825d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8835d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8865d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8875d0b17f1SPhilipp Reisner 
8885d0b17f1SPhilipp Reisner 	return ok;
8895d0b17f1SPhilipp Reisner }
8905d0b17f1SPhilipp Reisner 
8912325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8922325eb66SPhilipp Reisner    in a connection */
89369a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
894907599e0SPhilipp Reisner {
89569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8960829f5edSAndreas Gruenbacher 	int err;
897907599e0SPhilipp Reisner 
898b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
899b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
900907599e0SPhilipp Reisner 
90169a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
90269a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
903b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9048410da8fSPhilipp Reisner 
90569a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9060829f5edSAndreas Gruenbacher 	if (!err)
90769a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9080829f5edSAndreas Gruenbacher 	if (!err)
90969a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9100829f5edSAndreas Gruenbacher 	if (!err)
91169a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
912b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
913b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
914b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
915b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9160829f5edSAndreas Gruenbacher 	return err;
917907599e0SPhilipp Reisner }
918b411b363SPhilipp Reisner 
919b411b363SPhilipp Reisner /*
920b411b363SPhilipp Reisner  * return values:
921b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
922b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
923b411b363SPhilipp Reisner  *  -1 peer talks different language,
924b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
925b411b363SPhilipp Reisner  *  -2 We do not have a network config...
926b411b363SPhilipp Reisner  */
927bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
928b411b363SPhilipp Reisner {
9297da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
930c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
93144ed167dSPhilipp Reisner 	struct net_conf *nc;
9325d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9335d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
934197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9357a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
936bde89a9eSAndreas Gruenbacher 		.connection = connection,
9377a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9387a426fd8SPhilipp Reisner 	};
939b411b363SPhilipp Reisner 
940bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
941bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
942b411b363SPhilipp Reisner 		return -2;
943b411b363SPhilipp Reisner 
9447da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
945bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
946bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9477da35862SPhilipp Reisner 	sock.socket = NULL;
9487da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
949bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
950bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9517da35862SPhilipp Reisner 	msock.socket = NULL;
9527da35862SPhilipp Reisner 
9530916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
954bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
955b411b363SPhilipp Reisner 
956bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9577a426fd8SPhilipp Reisner 		return 0;
958b411b363SPhilipp Reisner 
959b411b363SPhilipp Reisner 	do {
9602bf89621SAndreas Gruenbacher 		struct socket *s;
961b411b363SPhilipp Reisner 
962bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
963b411b363SPhilipp Reisner 		if (s) {
9647da35862SPhilipp Reisner 			if (!sock.socket) {
9657da35862SPhilipp Reisner 				sock.socket = s;
966bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9677da35862SPhilipp Reisner 			} else if (!msock.socket) {
968bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9697da35862SPhilipp Reisner 				msock.socket = s;
970bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
971b411b363SPhilipp Reisner 			} else {
9721ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
973b411b363SPhilipp Reisner 				goto out_release_sockets;
974b411b363SPhilipp Reisner 			}
975b411b363SPhilipp Reisner 		}
976b411b363SPhilipp Reisner 
9775d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
978b411b363SPhilipp Reisner 			break;
979b411b363SPhilipp Reisner 
980b411b363SPhilipp Reisner retry:
981bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
982b411b363SPhilipp Reisner 		if (s) {
983bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9847da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9857da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98692f14951SPhilipp Reisner 			switch (fp) {
987e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9887da35862SPhilipp Reisner 				if (sock.socket) {
9891ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9907da35862SPhilipp Reisner 					sock_release(sock.socket);
99180c6eed4SPhilipp Reisner 					sock.socket = s;
99280c6eed4SPhilipp Reisner 					goto randomize;
993b411b363SPhilipp Reisner 				}
9947da35862SPhilipp Reisner 				sock.socket = s;
995b411b363SPhilipp Reisner 				break;
996e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
997bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9987da35862SPhilipp Reisner 				if (msock.socket) {
9991ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
10007da35862SPhilipp Reisner 					sock_release(msock.socket);
100180c6eed4SPhilipp Reisner 					msock.socket = s;
100280c6eed4SPhilipp Reisner 					goto randomize;
1003b411b363SPhilipp Reisner 				}
10047da35862SPhilipp Reisner 				msock.socket = s;
1005b411b363SPhilipp Reisner 				break;
1006b411b363SPhilipp Reisner 			default:
10071ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1008b411b363SPhilipp Reisner 				sock_release(s);
100980c6eed4SPhilipp Reisner randomize:
101038b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1011b411b363SPhilipp Reisner 					goto retry;
1012b411b363SPhilipp Reisner 			}
1013b411b363SPhilipp Reisner 		}
1014b411b363SPhilipp Reisner 
1015bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1016b411b363SPhilipp Reisner 			goto out_release_sockets;
1017b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1018b411b363SPhilipp Reisner 			flush_signals(current);
1019b411b363SPhilipp Reisner 			smp_rmb();
1020bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1021b411b363SPhilipp Reisner 				goto out_release_sockets;
1022b411b363SPhilipp Reisner 		}
1023b411b363SPhilipp Reisner 
10245d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1025b666dbf8SPhilipp Reisner 	} while (!ok);
1026b411b363SPhilipp Reisner 
10277a426fd8SPhilipp Reisner 	if (ad.s_listen)
10287a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1029b411b363SPhilipp Reisner 
103098683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
103198683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1035b411b363SPhilipp Reisner 
10367da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10377da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1038b411b363SPhilipp Reisner 
1039b411b363SPhilipp Reisner 	/* NOT YET ...
1040bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10417da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10426038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1043b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104444ed167dSPhilipp Reisner 	rcu_read_lock();
1045bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1046b411b363SPhilipp Reisner 
10477da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10487da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104944ed167dSPhilipp Reisner 
10507da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
105144ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
105208b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105344ed167dSPhilipp Reisner 	rcu_read_unlock();
105444ed167dSPhilipp Reisner 
10557da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1056b411b363SPhilipp Reisner 
1057b411b363SPhilipp Reisner 	/* we don't want delays.
105825985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
10597da35862SPhilipp Reisner 	drbd_tcp_nodelay(sock.socket);
10607da35862SPhilipp Reisner 	drbd_tcp_nodelay(msock.socket);
1061b411b363SPhilipp Reisner 
1062bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1063bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1064bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1065b411b363SPhilipp Reisner 
1066bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1067b411b363SPhilipp Reisner 	if (h <= 0)
1068b411b363SPhilipp Reisner 		return h;
1069b411b363SPhilipp Reisner 
1070bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1071b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1072bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1073b10d96cbSJohannes Thoma 		case -1:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1075b411b363SPhilipp Reisner 			return -1;
1076b10d96cbSJohannes Thoma 		case 0:
10771ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1078b10d96cbSJohannes Thoma 			return 0;
1079b411b363SPhilipp Reisner 		}
1080b411b363SPhilipp Reisner 	}
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1083bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1084b411b363SPhilipp Reisner 
1085bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10867e2455c1SPhilipp Reisner 		return -1;
10871e86ac48SPhilipp Reisner 
108813c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108913c76abaSPhilipp Reisner 	 * being promoted to Primary.
109013c76abaSPhilipp Reisner 	 *
109113c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
109213c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109313c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109413c76abaSPhilipp Reisner 	 */
109531007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109631007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109731007745SPhilipp Reisner 
109831007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
1127668700b4SPhilipp Reisner 	connection->ack_sender = create_singlethread_workqueue("drbd_ack_sender");
1128668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1129668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1130668700b4SPhilipp Reisner 		return 0;
1131668700b4SPhilipp Reisner 	}
1132b411b363SPhilipp Reisner 
11330500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113408b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113508b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113608b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
113708b165baSPhilipp Reisner 	 * just to clear a single value. */
1138bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11390500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114008b165baSPhilipp Reisner 
1141d3fcb490SPhilipp Reisner 	return h;
1142b411b363SPhilipp Reisner 
1143b411b363SPhilipp Reisner out_release_sockets:
11447a426fd8SPhilipp Reisner 	if (ad.s_listen)
11457a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11467da35862SPhilipp Reisner 	if (sock.socket)
11477da35862SPhilipp Reisner 		sock_release(sock.socket);
11487da35862SPhilipp Reisner 	if (msock.socket)
11497da35862SPhilipp Reisner 		sock_release(msock.socket);
1150b411b363SPhilipp Reisner 	return -1;
1151b411b363SPhilipp Reisner }
1152b411b363SPhilipp Reisner 
1153bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1154b411b363SPhilipp Reisner {
1155bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1156b411b363SPhilipp Reisner 
11570c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11580c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11590c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11600c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11611ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11620c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116302918be2SPhilipp Reisner 		}
11640c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11650c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11660c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11670c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1168e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1169e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1170e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1171b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1172eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1173e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1174e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1175e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1176e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1177e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
117877351055SPhilipp Reisner 		pi->vnr = 0;
117902918be2SPhilipp Reisner 	} else {
11801ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1181e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1182bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11838172f3e9SAndreas Gruenbacher 		return -EINVAL;
1184b411b363SPhilipp Reisner 	}
1185e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11868172f3e9SAndreas Gruenbacher 	return 0;
1187b411b363SPhilipp Reisner }
1188b411b363SPhilipp Reisner 
1189bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1190257d0af6SPhilipp Reisner {
1191bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
119269bc7bc3SAndreas Gruenbacher 	int err;
1193257d0af6SPhilipp Reisner 
1194bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1195a5c31904SAndreas Gruenbacher 	if (err)
119669bc7bc3SAndreas Gruenbacher 		return err;
1197257d0af6SPhilipp Reisner 
1198bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1199bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1200b411b363SPhilipp Reisner 
120169bc7bc3SAndreas Gruenbacher 	return err;
1202b411b363SPhilipp Reisner }
1203b411b363SPhilipp Reisner 
1204bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1205b411b363SPhilipp Reisner {
1206b411b363SPhilipp Reisner 	int rv;
1207c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
12084b0007c0SPhilipp Reisner 	int vnr;
1209b411b363SPhilipp Reisner 
1210f6ba8636SAndreas Gruenbacher 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1211615e087fSLars Ellenberg 		rcu_read_lock();
1212c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1213c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1214c06ece6bSAndreas Gruenbacher 
1215b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1216615e087fSLars Ellenberg 				continue;
1217b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1218615e087fSLars Ellenberg 			rcu_read_unlock();
12194b0007c0SPhilipp Reisner 
1220f418815fSLars Ellenberg 			/* Right now, we have only this one synchronous code path
1221f418815fSLars Ellenberg 			 * for flushes between request epochs.
1222f418815fSLars Ellenberg 			 * We may want to make those asynchronous,
1223f418815fSLars Ellenberg 			 * or at least parallelize the flushes to the volume devices.
1224f418815fSLars Ellenberg 			 */
1225f418815fSLars Ellenberg 			device->flush_jif = jiffies;
1226f418815fSLars Ellenberg 			set_bit(FLUSH_PENDING, &device->flags);
1227b30ab791SAndreas Gruenbacher 			rv = blkdev_issue_flush(device->ldev->backing_bdev,
1228615e087fSLars Ellenberg 					GFP_NOIO, NULL);
1229f418815fSLars Ellenberg 			clear_bit(FLUSH_PENDING, &device->flags);
1230b411b363SPhilipp Reisner 			if (rv) {
1231d0180171SAndreas Gruenbacher 				drbd_info(device, "local disk flush failed with status %d\n", rv);
1232b411b363SPhilipp Reisner 				/* would rather check on EOPNOTSUPP, but that is not reliable.
1233b411b363SPhilipp Reisner 				 * don't try again for ANY return value != 0
1234b411b363SPhilipp Reisner 				 * if (rv == -EOPNOTSUPP) */
1235f6ba8636SAndreas Gruenbacher 				drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1236b411b363SPhilipp Reisner 			}
1237b30ab791SAndreas Gruenbacher 			put_ldev(device);
123805a10ec7SAndreas Gruenbacher 			kref_put(&device->kref, drbd_destroy_device);
1239615e087fSLars Ellenberg 
1240615e087fSLars Ellenberg 			rcu_read_lock();
1241615e087fSLars Ellenberg 			if (rv)
12424b0007c0SPhilipp Reisner 				break;
1243b411b363SPhilipp Reisner 		}
1244615e087fSLars Ellenberg 		rcu_read_unlock();
1245b411b363SPhilipp Reisner 	}
1246b411b363SPhilipp Reisner }
1247b411b363SPhilipp Reisner 
1248b411b363SPhilipp Reisner /**
1249b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1250b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1251b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1252b411b363SPhilipp Reisner  * @ev:		Epoch event.
1253b411b363SPhilipp Reisner  */
1254bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1255b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1256b411b363SPhilipp Reisner 					       enum epoch_event ev)
1257b411b363SPhilipp Reisner {
12582451fc3bSPhilipp Reisner 	int epoch_size;
1259b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1260b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1261b411b363SPhilipp Reisner 
1262bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1263b411b363SPhilipp Reisner 	do {
1264b411b363SPhilipp Reisner 		next_epoch = NULL;
1265b411b363SPhilipp Reisner 
1266b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1267b411b363SPhilipp Reisner 
1268b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1269b411b363SPhilipp Reisner 		case EV_PUT:
1270b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1271b411b363SPhilipp Reisner 			break;
1272b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1273b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1274b411b363SPhilipp Reisner 			break;
1275b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1276b411b363SPhilipp Reisner 			/* nothing to do*/
1277b411b363SPhilipp Reisner 			break;
1278b411b363SPhilipp Reisner 		}
1279b411b363SPhilipp Reisner 
1280b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1281b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
128280f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1283b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1284bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1285bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1286bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1287b411b363SPhilipp Reisner 			}
12889ed57dcbSLars Ellenberg #if 0
12899ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
12909ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
129180f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1292bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
12939ed57dcbSLars Ellenberg #endif
1294b411b363SPhilipp Reisner 
1295bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1296b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1297b411b363SPhilipp Reisner 				list_del(&epoch->list);
1298b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1299bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1300b411b363SPhilipp Reisner 				kfree(epoch);
1301b411b363SPhilipp Reisner 
1302b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1303b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1304b411b363SPhilipp Reisner 			} else {
1305b411b363SPhilipp Reisner 				epoch->flags = 0;
1306b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1307698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1308b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1309b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1310b411b363SPhilipp Reisner 			}
1311b411b363SPhilipp Reisner 		}
1312b411b363SPhilipp Reisner 
1313b411b363SPhilipp Reisner 		if (!next_epoch)
1314b411b363SPhilipp Reisner 			break;
1315b411b363SPhilipp Reisner 
1316b411b363SPhilipp Reisner 		epoch = next_epoch;
1317b411b363SPhilipp Reisner 	} while (1);
1318b411b363SPhilipp Reisner 
1319bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1320b411b363SPhilipp Reisner 
1321b411b363SPhilipp Reisner 	return rv;
1322b411b363SPhilipp Reisner }
1323b411b363SPhilipp Reisner 
13248fe39aacSPhilipp Reisner static enum write_ordering_e
13258fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
13268fe39aacSPhilipp Reisner {
13278fe39aacSPhilipp Reisner 	struct disk_conf *dc;
13288fe39aacSPhilipp Reisner 
13298fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
13308fe39aacSPhilipp Reisner 
1331f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1332f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1333f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1334f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
13358fe39aacSPhilipp Reisner 
13368fe39aacSPhilipp Reisner 	return wo;
13378fe39aacSPhilipp Reisner }
13388fe39aacSPhilipp Reisner 
1339b411b363SPhilipp Reisner /**
1340b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1341bde89a9eSAndreas Gruenbacher  * @connection:	DRBD connection.
1342b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1343b411b363SPhilipp Reisner  */
13448fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
13458fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1346b411b363SPhilipp Reisner {
1347e9526580SPhilipp Reisner 	struct drbd_device *device;
1348b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
13494b0007c0SPhilipp Reisner 	int vnr;
1350b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1351f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1352f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1353f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1354b411b363SPhilipp Reisner 	};
1355b411b363SPhilipp Reisner 
1356e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1357f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1358b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1359daeda1ccSPhilipp Reisner 	rcu_read_lock();
1360e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
13618fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
13628fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
13638fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
13648fe39aacSPhilipp Reisner 				bdev = NULL;
1365b30ab791SAndreas Gruenbacher 			put_ldev(device);
13664b0007c0SPhilipp Reisner 		}
13678fe39aacSPhilipp Reisner 	}
13688fe39aacSPhilipp Reisner 
13698fe39aacSPhilipp Reisner 	if (bdev)
13708fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
13718fe39aacSPhilipp Reisner 
137270df7092SLars Ellenberg 	rcu_read_unlock();
137370df7092SLars Ellenberg 
1374e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1375f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1376e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1377b411b363SPhilipp Reisner }
1378b411b363SPhilipp Reisner 
1379b411b363SPhilipp Reisner /**
1380fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1381b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1382db830c46SAndreas Gruenbacher  * @peer_req:	peer request
138345bb912bSLars Ellenberg  * @rw:		flag field, see bio->bi_rw
138410f6d992SLars Ellenberg  *
138510f6d992SLars Ellenberg  * May spread the pages to multiple bios,
138610f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
138710f6d992SLars Ellenberg  *
138810f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
138910f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
139010f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
139110f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
139210f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
139310f6d992SLars Ellenberg  *  on certain Xen deployments.
139445bb912bSLars Ellenberg  */
139545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1396b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1397fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
139845bb912bSLars Ellenberg 			     const unsigned rw, const int fault_type)
139945bb912bSLars Ellenberg {
140045bb912bSLars Ellenberg 	struct bio *bios = NULL;
140145bb912bSLars Ellenberg 	struct bio *bio;
1402db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1403db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
140411f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
140545bb912bSLars Ellenberg 	unsigned n_bios = 0;
140611f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
140710f6d992SLars Ellenberg 	int err = -ENOMEM;
140845bb912bSLars Ellenberg 
1409a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1410a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1411a0fb3c47SLars Ellenberg 		 * zeroing things out. */
14125dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
141345d2933cSLars Ellenberg 		/* add it to the active list now,
141445d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
141521ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
141621ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
141745d2933cSLars Ellenberg 		spin_lock_irq(&device->resource->req_lock);
141845d2933cSLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
141945d2933cSLars Ellenberg 		spin_unlock_irq(&device->resource->req_lock);
1420a0fb3c47SLars Ellenberg 		if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1421d93ba7a5SMartin K. Petersen 			sector, data_size >> 9, GFP_NOIO, false))
1422a0fb3c47SLars Ellenberg 			peer_req->flags |= EE_WAS_ERROR;
1423a0fb3c47SLars Ellenberg 		drbd_endio_write_sec_final(peer_req);
1424a0fb3c47SLars Ellenberg 		return 0;
1425a0fb3c47SLars Ellenberg 	}
1426a0fb3c47SLars Ellenberg 
142754ed4ed8SLars Ellenberg 	/* Discards don't have any payload.
142854ed4ed8SLars Ellenberg 	 * But the scsi layer still expects a bio_vec it can use internally,
142954ed4ed8SLars Ellenberg 	 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
1430a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_IS_TRIM)
143154ed4ed8SLars Ellenberg 		nr_pages = 1;
1432a0fb3c47SLars Ellenberg 
143345bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
143445bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
143545bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
14369476f39dSLars Ellenberg 	 * request in more than one bio.
14379476f39dSLars Ellenberg 	 *
14389476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
14399476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
14409476f39dSLars Ellenberg 	 */
144145bb912bSLars Ellenberg next_bio:
144245bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
144345bb912bSLars Ellenberg 	if (!bio) {
1444a0fb3c47SLars Ellenberg 		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
144545bb912bSLars Ellenberg 		goto fail;
144645bb912bSLars Ellenberg 	}
1447db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
14484f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1449b30ab791SAndreas Gruenbacher 	bio->bi_bdev = device->ldev->backing_bdev;
145045bb912bSLars Ellenberg 	bio->bi_rw = rw;
1451db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1452fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
145345bb912bSLars Ellenberg 
145445bb912bSLars Ellenberg 	bio->bi_next = bios;
145545bb912bSLars Ellenberg 	bios = bio;
145645bb912bSLars Ellenberg 	++n_bios;
145745bb912bSLars Ellenberg 
1458a0fb3c47SLars Ellenberg 	if (rw & REQ_DISCARD) {
145911f8b2b6SAndreas Gruenbacher 		bio->bi_iter.bi_size = data_size;
1460a0fb3c47SLars Ellenberg 		goto submit;
1461a0fb3c47SLars Ellenberg 	}
1462a0fb3c47SLars Ellenberg 
146345bb912bSLars Ellenberg 	page_chain_for_each(page) {
146411f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
146545bb912bSLars Ellenberg 		if (!bio_add_page(bio, page, len, 0)) {
146610f6d992SLars Ellenberg 			/* A single page must always be possible!
146710f6d992SLars Ellenberg 			 * But in case it fails anyways,
146810f6d992SLars Ellenberg 			 * we deal with it, and complain (below). */
146910f6d992SLars Ellenberg 			if (bio->bi_vcnt == 0) {
1470d0180171SAndreas Gruenbacher 				drbd_err(device,
147110f6d992SLars Ellenberg 					"bio_add_page failed for len=%u, "
147210f6d992SLars Ellenberg 					"bi_vcnt=0 (bi_sector=%llu)\n",
14734f024f37SKent Overstreet 					len, (uint64_t)bio->bi_iter.bi_sector);
147410f6d992SLars Ellenberg 				err = -ENOSPC;
147510f6d992SLars Ellenberg 				goto fail;
147610f6d992SLars Ellenberg 			}
147745bb912bSLars Ellenberg 			goto next_bio;
147845bb912bSLars Ellenberg 		}
147911f8b2b6SAndreas Gruenbacher 		data_size -= len;
148045bb912bSLars Ellenberg 		sector += len >> 9;
148145bb912bSLars Ellenberg 		--nr_pages;
148245bb912bSLars Ellenberg 	}
148311f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1484a0fb3c47SLars Ellenberg submit:
1485a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
148645bb912bSLars Ellenberg 
1487db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
148821ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
148921ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
149021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
149145bb912bSLars Ellenberg 	do {
149245bb912bSLars Ellenberg 		bio = bios;
149345bb912bSLars Ellenberg 		bios = bios->bi_next;
149445bb912bSLars Ellenberg 		bio->bi_next = NULL;
149545bb912bSLars Ellenberg 
1496b30ab791SAndreas Gruenbacher 		drbd_generic_make_request(device, fault_type, bio);
149745bb912bSLars Ellenberg 	} while (bios);
149845bb912bSLars Ellenberg 	return 0;
149945bb912bSLars Ellenberg 
150045bb912bSLars Ellenberg fail:
150145bb912bSLars Ellenberg 	while (bios) {
150245bb912bSLars Ellenberg 		bio = bios;
150345bb912bSLars Ellenberg 		bios = bios->bi_next;
150445bb912bSLars Ellenberg 		bio_put(bio);
150545bb912bSLars Ellenberg 	}
150610f6d992SLars Ellenberg 	return err;
150745bb912bSLars Ellenberg }
150845bb912bSLars Ellenberg 
1509b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1510db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
151153840641SAndreas Gruenbacher {
1512db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
151353840641SAndreas Gruenbacher 
1514b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
151553840641SAndreas Gruenbacher 	drbd_clear_interval(i);
151653840641SAndreas Gruenbacher 
15176c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
151853840641SAndreas Gruenbacher 	if (i->waiting)
1519b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
152053840641SAndreas Gruenbacher }
152153840641SAndreas Gruenbacher 
1522bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
152377fede51SPhilipp Reisner {
1524c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
152577fede51SPhilipp Reisner 	int vnr;
152677fede51SPhilipp Reisner 
152777fede51SPhilipp Reisner 	rcu_read_lock();
1528c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1529c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1530c06ece6bSAndreas Gruenbacher 
1531b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
153277fede51SPhilipp Reisner 		rcu_read_unlock();
1533b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
153405a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
153577fede51SPhilipp Reisner 		rcu_read_lock();
153677fede51SPhilipp Reisner 	}
153777fede51SPhilipp Reisner 	rcu_read_unlock();
153877fede51SPhilipp Reisner }
153977fede51SPhilipp Reisner 
1540bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1541b411b363SPhilipp Reisner {
15422451fc3bSPhilipp Reisner 	int rv;
1543e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1544b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1545b411b363SPhilipp Reisner 
15469ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
15479ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
15489ed57dcbSLars Ellenberg 	 */
1549bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1550bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1551bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1552b411b363SPhilipp Reisner 
1553b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1554b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1555b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1556b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1557b411b363SPhilipp Reisner 	 * completed. */
1558e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1559f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1560b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
156182bc0194SAndreas Gruenbacher 			return 0;
1562b411b363SPhilipp Reisner 
1563b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1564b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1565b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
15662451fc3bSPhilipp Reisner 		if (epoch)
15672451fc3bSPhilipp Reisner 			break;
15682451fc3bSPhilipp Reisner 		else
15691ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
15702451fc3bSPhilipp Reisner 			/* Fall through */
15712451fc3bSPhilipp Reisner 
1572f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1573f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1574bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1575bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
15762451fc3bSPhilipp Reisner 
1577bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
15782451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
15792451fc3bSPhilipp Reisner 			if (epoch)
15802451fc3bSPhilipp Reisner 				break;
1581b411b363SPhilipp Reisner 		}
1582b411b363SPhilipp Reisner 
158382bc0194SAndreas Gruenbacher 		return 0;
15842451fc3bSPhilipp Reisner 	default:
1585e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1586e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
158782bc0194SAndreas Gruenbacher 		return -EIO;
1588b411b363SPhilipp Reisner 	}
1589b411b363SPhilipp Reisner 
1590b411b363SPhilipp Reisner 	epoch->flags = 0;
1591b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1592b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1593b411b363SPhilipp Reisner 
1594bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1595bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1596bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1597bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1598bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1599b411b363SPhilipp Reisner 	} else {
1600b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1601b411b363SPhilipp Reisner 		kfree(epoch);
1602b411b363SPhilipp Reisner 	}
1603bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1604b411b363SPhilipp Reisner 
160582bc0194SAndreas Gruenbacher 	return 0;
1606b411b363SPhilipp Reisner }
1607b411b363SPhilipp Reisner 
1608b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
1609b411b363SPhilipp Reisner  * and from receive_Data */
1610f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
161169a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1612a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1613b411b363SPhilipp Reisner {
161469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1615b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
1616db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1617b411b363SPhilipp Reisner 	struct page *page;
161811f8b2b6SAndreas Gruenbacher 	int digest_size, err;
161911f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
162069a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
162169a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
16226b4388acSPhilipp Reisner 	unsigned long *data;
1623a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1624b411b363SPhilipp Reisner 
162511f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1626a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
162711f8b2b6SAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
16289f5bdc33SAndreas Gruenbacher 		/*
16299f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
16309f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
16319f5bdc33SAndreas Gruenbacher 		 */
163211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1633a5c31904SAndreas Gruenbacher 		if (err)
1634b411b363SPhilipp Reisner 			return NULL;
163511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
163688104ca4SAndreas Gruenbacher 	}
1637b411b363SPhilipp Reisner 
1638a0fb3c47SLars Ellenberg 	if (trim) {
1639a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, data_size == 0);
1640a0fb3c47SLars Ellenberg 		data_size = be32_to_cpu(trim->size);
1641a0fb3c47SLars Ellenberg 	}
1642a0fb3c47SLars Ellenberg 
1643841ce241SAndreas Gruenbacher 	if (!expect(IS_ALIGNED(data_size, 512)))
1644841ce241SAndreas Gruenbacher 		return NULL;
1645a0fb3c47SLars Ellenberg 	/* prepare for larger trim requests. */
1646a0fb3c47SLars Ellenberg 	if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
1647841ce241SAndreas Gruenbacher 		return NULL;
1648b411b363SPhilipp Reisner 
16496666032aSLars Ellenberg 	/* even though we trust out peer,
16506666032aSLars Ellenberg 	 * we sometimes have to double check. */
16516666032aSLars Ellenberg 	if (sector + (data_size>>9) > capacity) {
1652d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1653fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
16546666032aSLars Ellenberg 			(unsigned long long)capacity,
16556666032aSLars Ellenberg 			(unsigned long long)sector, data_size);
16566666032aSLars Ellenberg 		return NULL;
16576666032aSLars Ellenberg 	}
16586666032aSLars Ellenberg 
1659b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1660b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1661b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
1662a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
1663db830c46SAndreas Gruenbacher 	if (!peer_req)
1664b411b363SPhilipp Reisner 		return NULL;
166545bb912bSLars Ellenberg 
166621ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
1667a0fb3c47SLars Ellenberg 	if (trim)
166881a3537aSLars Ellenberg 		return peer_req;
1669a73ff323SLars Ellenberg 
1670b411b363SPhilipp Reisner 	ds = data_size;
1671db830c46SAndreas Gruenbacher 	page = peer_req->pages;
167245bb912bSLars Ellenberg 	page_chain_for_each(page) {
167345bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
16746b4388acSPhilipp Reisner 		data = kmap(page);
167569a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1676b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1677d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
16786b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
16796b4388acSPhilipp Reisner 		}
1680b411b363SPhilipp Reisner 		kunmap(page);
1681a5c31904SAndreas Gruenbacher 		if (err) {
1682b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1683b411b363SPhilipp Reisner 			return NULL;
1684b411b363SPhilipp Reisner 		}
1685a5c31904SAndreas Gruenbacher 		ds -= len;
1686b411b363SPhilipp Reisner 	}
1687b411b363SPhilipp Reisner 
168811f8b2b6SAndreas Gruenbacher 	if (digest_size) {
168969a22773SAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
169011f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1691d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1692470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1693b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1694b411b363SPhilipp Reisner 			return NULL;
1695b411b363SPhilipp Reisner 		}
1696b411b363SPhilipp Reisner 	}
1697b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1698db830c46SAndreas Gruenbacher 	return peer_req;
1699b411b363SPhilipp Reisner }
1700b411b363SPhilipp Reisner 
1701b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1702b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1703b411b363SPhilipp Reisner  */
170469a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1705b411b363SPhilipp Reisner {
1706b411b363SPhilipp Reisner 	struct page *page;
1707a5c31904SAndreas Gruenbacher 	int err = 0;
1708b411b363SPhilipp Reisner 	void *data;
1709b411b363SPhilipp Reisner 
1710c3470cdeSLars Ellenberg 	if (!data_size)
1711fc5be839SAndreas Gruenbacher 		return 0;
1712c3470cdeSLars Ellenberg 
171369a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1714b411b363SPhilipp Reisner 
1715b411b363SPhilipp Reisner 	data = kmap(page);
1716b411b363SPhilipp Reisner 	while (data_size) {
1717fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1718fc5be839SAndreas Gruenbacher 
171969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1720a5c31904SAndreas Gruenbacher 		if (err)
1721b411b363SPhilipp Reisner 			break;
1722a5c31904SAndreas Gruenbacher 		data_size -= len;
1723b411b363SPhilipp Reisner 	}
1724b411b363SPhilipp Reisner 	kunmap(page);
172569a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1726fc5be839SAndreas Gruenbacher 	return err;
1727b411b363SPhilipp Reisner }
1728b411b363SPhilipp Reisner 
172969a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1730b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1731b411b363SPhilipp Reisner {
17327988613bSKent Overstreet 	struct bio_vec bvec;
17337988613bSKent Overstreet 	struct bvec_iter iter;
1734b411b363SPhilipp Reisner 	struct bio *bio;
173511f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
173669a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
173769a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
1738b411b363SPhilipp Reisner 
173911f8b2b6SAndreas Gruenbacher 	digest_size = 0;
174069a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
174111f8b2b6SAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
174211f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1743a5c31904SAndreas Gruenbacher 		if (err)
1744a5c31904SAndreas Gruenbacher 			return err;
174511f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
174688104ca4SAndreas Gruenbacher 	}
1747b411b363SPhilipp Reisner 
1748b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
1749b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
175069a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
1751b411b363SPhilipp Reisner 
1752b411b363SPhilipp Reisner 	bio = req->master_bio;
175369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
1754b411b363SPhilipp Reisner 
17557988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
17567988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
17577988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
175869a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
17597988613bSKent Overstreet 		kunmap(bvec.bv_page);
1760a5c31904SAndreas Gruenbacher 		if (err)
1761a5c31904SAndreas Gruenbacher 			return err;
1762a5c31904SAndreas Gruenbacher 		data_size -= expect;
1763b411b363SPhilipp Reisner 	}
1764b411b363SPhilipp Reisner 
176511f8b2b6SAndreas Gruenbacher 	if (digest_size) {
176669a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
176711f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
176869a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
176928284cefSAndreas Gruenbacher 			return -EINVAL;
1770b411b363SPhilipp Reisner 		}
1771b411b363SPhilipp Reisner 	}
1772b411b363SPhilipp Reisner 
177369a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
177428284cefSAndreas Gruenbacher 	return 0;
1775b411b363SPhilipp Reisner }
1776b411b363SPhilipp Reisner 
1777a990be46SAndreas Gruenbacher /*
1778668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
1779a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
1780a990be46SAndreas Gruenbacher  */
178199920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
1782b411b363SPhilipp Reisner {
17838050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
1784a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
1785a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1786a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1787db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
178899920dc5SAndreas Gruenbacher 	int err;
1789b411b363SPhilipp Reisner 
17900b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
1791b411b363SPhilipp Reisner 
1792db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1793b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
1794a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
1795b411b363SPhilipp Reisner 	} else {
1796b411b363SPhilipp Reisner 		/* Record failure to sync */
1797b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
1798b411b363SPhilipp Reisner 
1799a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
1800b411b363SPhilipp Reisner 	}
1801b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1802b411b363SPhilipp Reisner 
180399920dc5SAndreas Gruenbacher 	return err;
1804b411b363SPhilipp Reisner }
1805b411b363SPhilipp Reisner 
180669a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1807a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
1808b411b363SPhilipp Reisner {
180969a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1810db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1811b411b363SPhilipp Reisner 
1812a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
1813db830c46SAndreas Gruenbacher 	if (!peer_req)
181445bb912bSLars Ellenberg 		goto fail;
1815b411b363SPhilipp Reisner 
1816b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
1817b411b363SPhilipp Reisner 
1818b30ab791SAndreas Gruenbacher 	inc_unacked(device);
1819b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
1820b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
1821b411b363SPhilipp Reisner 
1822a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
182321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
182445bb912bSLars Ellenberg 
18250500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1826b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
18270500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
1828b411b363SPhilipp Reisner 
1829a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
1830b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
1831e1c1b0fcSAndreas Gruenbacher 		return 0;
183245bb912bSLars Ellenberg 
183310f6d992SLars Ellenberg 	/* don't care for the reason here */
1834d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
18350500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1836a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
18370500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
183822cc37a9SLars Ellenberg 
1839b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
184045bb912bSLars Ellenberg fail:
1841b30ab791SAndreas Gruenbacher 	put_ldev(device);
1842e1c1b0fcSAndreas Gruenbacher 	return -EIO;
1843b411b363SPhilipp Reisner }
1844b411b363SPhilipp Reisner 
1845668eebc6SAndreas Gruenbacher static struct drbd_request *
1846b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
1847bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
1848b411b363SPhilipp Reisner {
1849b411b363SPhilipp Reisner 	struct drbd_request *req;
1850668eebc6SAndreas Gruenbacher 
1851bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
1852bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
18535e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
1854668eebc6SAndreas Gruenbacher 		return req;
1855c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
1856d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
1857c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
1858c3afd8f5SAndreas Gruenbacher 	}
1859668eebc6SAndreas Gruenbacher 	return NULL;
1860668eebc6SAndreas Gruenbacher }
1861668eebc6SAndreas Gruenbacher 
1862bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
1863b411b363SPhilipp Reisner {
18649f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
1865b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
1866b411b363SPhilipp Reisner 	struct drbd_request *req;
1867b411b363SPhilipp Reisner 	sector_t sector;
186882bc0194SAndreas Gruenbacher 	int err;
1869e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
18704a76b161SAndreas Gruenbacher 
18719f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
18729f4fe9adSAndreas Gruenbacher 	if (!peer_device)
18734a76b161SAndreas Gruenbacher 		return -EIO;
18749f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
1875b411b363SPhilipp Reisner 
1876b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
1877b411b363SPhilipp Reisner 
18780500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1879b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
18800500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
1881c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
188282bc0194SAndreas Gruenbacher 		return -EIO;
1883b411b363SPhilipp Reisner 
188424c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
1885b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
1886b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
188769a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
188882bc0194SAndreas Gruenbacher 	if (!err)
18898554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
1890b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
1891b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
1892b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
1893b411b363SPhilipp Reisner 
189482bc0194SAndreas Gruenbacher 	return err;
1895b411b363SPhilipp Reisner }
1896b411b363SPhilipp Reisner 
1897bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
1898b411b363SPhilipp Reisner {
18999f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
1900b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
1901b411b363SPhilipp Reisner 	sector_t sector;
190282bc0194SAndreas Gruenbacher 	int err;
1903e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
19044a76b161SAndreas Gruenbacher 
19059f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
19069f4fe9adSAndreas Gruenbacher 	if (!peer_device)
19074a76b161SAndreas Gruenbacher 		return -EIO;
19089f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
1909b411b363SPhilipp Reisner 
1910b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
19110b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
1912b411b363SPhilipp Reisner 
1913b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1914b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
1915b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
1916fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
1917a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
1918b411b363SPhilipp Reisner 	} else {
1919b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1920d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
1921b411b363SPhilipp Reisner 
192269a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
1923b411b363SPhilipp Reisner 
192469a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
1925b411b363SPhilipp Reisner 	}
1926b411b363SPhilipp Reisner 
1927b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
1928778f271dSPhilipp Reisner 
192982bc0194SAndreas Gruenbacher 	return err;
1930b411b363SPhilipp Reisner }
1931b411b363SPhilipp Reisner 
1932b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
19337be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
1934b411b363SPhilipp Reisner {
19357be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
19367be8da07SAndreas Gruenbacher 	struct drbd_request *req;
1937b411b363SPhilipp Reisner 
1938b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
19397be8da07SAndreas Gruenbacher 		if (!i->local)
19407be8da07SAndreas Gruenbacher 			continue;
19417be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
19427be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
19437be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
19447be8da07SAndreas Gruenbacher 			continue;
19452312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
19462312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
1947d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
19487be8da07SAndreas Gruenbacher 	}
19497be8da07SAndreas Gruenbacher }
19507be8da07SAndreas Gruenbacher 
1951a990be46SAndreas Gruenbacher /*
1952668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
1953b411b363SPhilipp Reisner  */
195499920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
1955b411b363SPhilipp Reisner {
19568050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
1957a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
1958a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1959a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1960db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
196199920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
1962b411b363SPhilipp Reisner 
1963303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
1964db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1965b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1966b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
1967db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
1968b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
1969a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
1970b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
1971b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
1972b411b363SPhilipp Reisner 		} else {
1973a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
1974b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
1975b411b363SPhilipp Reisner 			 * maybe assert this?  */
1976b411b363SPhilipp Reisner 		}
1977b30ab791SAndreas Gruenbacher 		dec_unacked(device);
1978b411b363SPhilipp Reisner 	}
197908d0dabfSLars Ellenberg 
1980b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
1981b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
1982302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
19830500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
19840b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
1985b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
19867be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
1987b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
19880500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
1989bb3bfe96SAndreas Gruenbacher 	} else
19900b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
1991b411b363SPhilipp Reisner 
19925dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
1993b411b363SPhilipp Reisner 
199499920dc5SAndreas Gruenbacher 	return err;
1995b411b363SPhilipp Reisner }
1996b411b363SPhilipp Reisner 
1997a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
1998b411b363SPhilipp Reisner {
19998050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2000a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2001a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
200299920dc5SAndreas Gruenbacher 	int err;
2003b411b363SPhilipp Reisner 
2004a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2005a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2006b411b363SPhilipp Reisner 
200799920dc5SAndreas Gruenbacher 	return err;
2008b411b363SPhilipp Reisner }
2009b411b363SPhilipp Reisner 
2010d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2011b6a370baSPhilipp Reisner {
2012a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
20137be8da07SAndreas Gruenbacher }
2014b6a370baSPhilipp Reisner 
201599920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
20167be8da07SAndreas Gruenbacher {
2017a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2018a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2019a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
20207be8da07SAndreas Gruenbacher 
2021a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2022d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
20237be8da07SAndreas Gruenbacher }
20247be8da07SAndreas Gruenbacher 
20253e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
20263e394da1SAndreas Gruenbacher {
20273e394da1SAndreas Gruenbacher 	/*
20283e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
20293e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
20303e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
20313e394da1SAndreas Gruenbacher 	 */
20323e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
20333e394da1SAndreas Gruenbacher }
20343e394da1SAndreas Gruenbacher 
20353e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
20363e394da1SAndreas Gruenbacher {
20373e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
20383e394da1SAndreas Gruenbacher }
20393e394da1SAndreas Gruenbacher 
204069a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
20413e394da1SAndreas Gruenbacher {
204269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
20433c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
20443e394da1SAndreas Gruenbacher 
204569a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2046b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2047b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2048b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2049b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2050b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
20513c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2052b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
20533e394da1SAndreas Gruenbacher 	}
20547be8da07SAndreas Gruenbacher }
20553e394da1SAndreas Gruenbacher 
2056d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2057d93f6302SLars Ellenberg {
2058d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2059d93f6302SLars Ellenberg }
2060d93f6302SLars Ellenberg 
2061d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2062b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2063d93f6302SLars Ellenberg {
2064d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
2065b6a370baSPhilipp Reisner 	bool rv = 0;
2066b6a370baSPhilipp Reisner 
20670500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2068a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2069d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2070d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
2071b6a370baSPhilipp Reisner 			rv = 1;
2072b6a370baSPhilipp Reisner 			break;
2073b6a370baSPhilipp Reisner 		}
2074b6a370baSPhilipp Reisner 	}
20750500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2076b6a370baSPhilipp Reisner 
2077b6a370baSPhilipp Reisner 	return rv;
2078b6a370baSPhilipp Reisner }
2079b6a370baSPhilipp Reisner 
2080b411b363SPhilipp Reisner /* Called from receive_Data.
2081b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2082b411b363SPhilipp Reisner  *
2083b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2084b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2085b411b363SPhilipp Reisner  * been sent.
2086b411b363SPhilipp Reisner  *
2087b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2088b411b363SPhilipp Reisner  *
2089b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2090b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2091b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2092b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2093b411b363SPhilipp Reisner  *
2094b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2095b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2096b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2097b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2098b411b363SPhilipp Reisner  *
2099b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2100b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
210169a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2102b411b363SPhilipp Reisner {
210369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2104b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2105b411b363SPhilipp Reisner 	long timeout;
2106b874d231SPhilipp Reisner 	int ret = 0, tp;
21077be8da07SAndreas Gruenbacher 
210869a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
21097be8da07SAndreas Gruenbacher 		return 0;
21107be8da07SAndreas Gruenbacher 
2111b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2112b411b363SPhilipp Reisner 	for (;;) {
2113b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2114b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2115b411b363SPhilipp Reisner 			break;
21167be8da07SAndreas Gruenbacher 		}
2117b874d231SPhilipp Reisner 
2118b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2119b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2120b411b363SPhilipp Reisner 			break;
2121b411b363SPhilipp Reisner 		}
2122b874d231SPhilipp Reisner 
2123b874d231SPhilipp Reisner 		rcu_read_lock();
21245dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2125b874d231SPhilipp Reisner 		rcu_read_unlock();
2126b874d231SPhilipp Reisner 
2127b874d231SPhilipp Reisner 		if (!tp)
2128b874d231SPhilipp Reisner 			break;
2129b874d231SPhilipp Reisner 
2130b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2131b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2132b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
213344ed167dSPhilipp Reisner 		rcu_read_lock();
213469a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
213544ed167dSPhilipp Reisner 		rcu_read_unlock();
213671b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2137b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
21387be8da07SAndreas Gruenbacher 		if (!timeout) {
2139b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2140d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2141b411b363SPhilipp Reisner 			break;
2142b411b363SPhilipp Reisner 		}
2143b411b363SPhilipp Reisner 	}
2144b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2145b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2146b411b363SPhilipp Reisner 	return ret;
2147b411b363SPhilipp Reisner }
2148b411b363SPhilipp Reisner 
2149688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2150688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2151688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
215281f0ffd2SAndreas Gruenbacher static unsigned long wire_flags_to_bio(u32 dpf)
215376d2e7ecSPhilipp Reisner {
215476d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
215576d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
2156688593c5SLars Ellenberg 		(dpf & DP_FLUSH ? REQ_FLUSH : 0) |
215776d2e7ecSPhilipp Reisner 		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
215876d2e7ecSPhilipp Reisner }
215976d2e7ecSPhilipp Reisner 
2160b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
21617be8da07SAndreas Gruenbacher 				    unsigned int size)
2162b411b363SPhilipp Reisner {
21637be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21647be8da07SAndreas Gruenbacher 
21657be8da07SAndreas Gruenbacher     repeat:
2166b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21677be8da07SAndreas Gruenbacher 		struct drbd_request *req;
21687be8da07SAndreas Gruenbacher 		struct bio_and_error m;
21697be8da07SAndreas Gruenbacher 
21707be8da07SAndreas Gruenbacher 		if (!i->local)
21717be8da07SAndreas Gruenbacher 			continue;
21727be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
21737be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
21747be8da07SAndreas Gruenbacher 			continue;
21757be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
21767be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
21770500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
21787be8da07SAndreas Gruenbacher 		if (m.bio)
2179b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
21800500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
21817be8da07SAndreas Gruenbacher 		goto repeat;
21827be8da07SAndreas Gruenbacher 	}
21837be8da07SAndreas Gruenbacher }
21847be8da07SAndreas Gruenbacher 
2185b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
21867be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
21877be8da07SAndreas Gruenbacher {
2188e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2189bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
21907be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
21917be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
21927be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21937be8da07SAndreas Gruenbacher 	bool equal;
21947be8da07SAndreas Gruenbacher 	int err;
21957be8da07SAndreas Gruenbacher 
21967be8da07SAndreas Gruenbacher 	/*
21977be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
21987be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
21997be8da07SAndreas Gruenbacher 	 */
2200b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
22017be8da07SAndreas Gruenbacher 
22027be8da07SAndreas Gruenbacher     repeat:
2203b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22047be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
22057be8da07SAndreas Gruenbacher 			continue;
220608d0dabfSLars Ellenberg 		if (i->completed)
220708d0dabfSLars Ellenberg 			continue;
22087be8da07SAndreas Gruenbacher 
22097be8da07SAndreas Gruenbacher 		if (!i->local) {
22107be8da07SAndreas Gruenbacher 			/*
22117be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
22127be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
22137be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
22147be8da07SAndreas Gruenbacher 			 */
2215b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
22167be8da07SAndreas Gruenbacher 			if (err)
22177be8da07SAndreas Gruenbacher 				goto out;
22187be8da07SAndreas Gruenbacher 			goto repeat;
22197be8da07SAndreas Gruenbacher 		}
22207be8da07SAndreas Gruenbacher 
22217be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
22227be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
22237be8da07SAndreas Gruenbacher 			/*
22247be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2225d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2226d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2227d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
22287be8da07SAndreas Gruenbacher 			 */
2229d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
22307be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
22317be8da07SAndreas Gruenbacher 
22327be8da07SAndreas Gruenbacher 			if (!equal)
2233d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
22347be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
22357be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
22367be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
22377be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2238d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
22397be8da07SAndreas Gruenbacher 
2240a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
22417be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2242a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2243668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
22447be8da07SAndreas Gruenbacher 
22457be8da07SAndreas Gruenbacher 			err = -ENOENT;
22467be8da07SAndreas Gruenbacher 			goto out;
22477be8da07SAndreas Gruenbacher 		} else {
22487be8da07SAndreas Gruenbacher 			struct drbd_request *req =
22497be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
22507be8da07SAndreas Gruenbacher 
22517be8da07SAndreas Gruenbacher 			if (!equal)
2252d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
22537be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
22547be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
22557be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
22567be8da07SAndreas Gruenbacher 
22577be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
22587be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
22597be8da07SAndreas Gruenbacher 				/*
22607be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2261d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2262d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2263d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
22647be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
22657be8da07SAndreas Gruenbacher 				 *
22667be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
22677be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
22687be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
22697be8da07SAndreas Gruenbacher 				 */
2270b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
22717be8da07SAndreas Gruenbacher 				if (err) {
2272e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2273b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
22747be8da07SAndreas Gruenbacher 					goto out;
22757be8da07SAndreas Gruenbacher 				}
22767be8da07SAndreas Gruenbacher 				goto repeat;
22777be8da07SAndreas Gruenbacher 			}
22787be8da07SAndreas Gruenbacher 			/*
22797be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
22807be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
22817be8da07SAndreas Gruenbacher 			 */
22827be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
22837be8da07SAndreas Gruenbacher 		}
22847be8da07SAndreas Gruenbacher 	}
22857be8da07SAndreas Gruenbacher 	err = 0;
22867be8da07SAndreas Gruenbacher 
22877be8da07SAndreas Gruenbacher     out:
22887be8da07SAndreas Gruenbacher 	if (err)
2289b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22907be8da07SAndreas Gruenbacher 	return err;
22917be8da07SAndreas Gruenbacher }
22927be8da07SAndreas Gruenbacher 
2293b411b363SPhilipp Reisner /* mirrored write */
2294bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2295b411b363SPhilipp Reisner {
22969f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2297b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
229821ae5d7fSLars Ellenberg 	struct net_conf *nc;
2299b411b363SPhilipp Reisner 	sector_t sector;
2300db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2301e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
23027be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2303b411b363SPhilipp Reisner 	int rw = WRITE;
2304b411b363SPhilipp Reisner 	u32 dp_flags;
2305302bdeaeSPhilipp Reisner 	int err, tp;
23067be8da07SAndreas Gruenbacher 
23079f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
23089f4fe9adSAndreas Gruenbacher 	if (!peer_device)
23094a76b161SAndreas Gruenbacher 		return -EIO;
23109f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2311b411b363SPhilipp Reisner 
2312b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
231382bc0194SAndreas Gruenbacher 		int err2;
2314b411b363SPhilipp Reisner 
231569a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
231669a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2317bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
231869a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
231982bc0194SAndreas Gruenbacher 		if (!err)
232082bc0194SAndreas Gruenbacher 			err = err2;
232182bc0194SAndreas Gruenbacher 		return err;
2322b411b363SPhilipp Reisner 	}
2323b411b363SPhilipp Reisner 
2324fcefa62eSAndreas Gruenbacher 	/*
2325fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2326fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2327fcefa62eSAndreas Gruenbacher 	 * end of this function.
2328fcefa62eSAndreas Gruenbacher 	 */
2329b411b363SPhilipp Reisner 
2330b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2331a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2332db830c46SAndreas Gruenbacher 	if (!peer_req) {
2333b30ab791SAndreas Gruenbacher 		put_ldev(device);
233482bc0194SAndreas Gruenbacher 		return -EIO;
2335b411b363SPhilipp Reisner 	}
2336b411b363SPhilipp Reisner 
2337a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
233821ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
233921ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2340b411b363SPhilipp Reisner 
2341688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
234281f0ffd2SAndreas Gruenbacher 	rw |= wire_flags_to_bio(dp_flags);
2343a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2344a0fb3c47SLars Ellenberg 		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2345a0fb3c47SLars Ellenberg 		peer_req->flags |= EE_IS_TRIM;
2346a0fb3c47SLars Ellenberg 		if (!blk_queue_discard(q))
2347a0fb3c47SLars Ellenberg 			peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2348a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2349a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, rw & REQ_DISCARD);
2350a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2351a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
23520b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
23530b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2354a73ff323SLars Ellenberg 	}
2355688593c5SLars Ellenberg 
2356688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2357db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2358688593c5SLars Ellenberg 
2359bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2360bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2361db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2362db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2363bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2364b411b363SPhilipp Reisner 
2365302bdeaeSPhilipp Reisner 	rcu_read_lock();
236621ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
236721ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
236821ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
236921ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
237021ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
237121ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
237221ae5d7fSLars Ellenberg 			break;
237321ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
237421ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
237521ae5d7fSLars Ellenberg 			break;
237621ae5d7fSLars Ellenberg 		}
237721ae5d7fSLars Ellenberg 	}
2378302bdeaeSPhilipp Reisner 	rcu_read_unlock();
237921ae5d7fSLars Ellenberg 
238021ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
238121ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
238221ae5d7fSLars Ellenberg 		inc_unacked(device);
238321ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
238421ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
238521ae5d7fSLars Ellenberg 	}
238621ae5d7fSLars Ellenberg 
238721ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
238821ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
238921ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
23905dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
239121ae5d7fSLars Ellenberg 	}
239221ae5d7fSLars Ellenberg 
2393302bdeaeSPhilipp Reisner 	if (tp) {
239421ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
239521ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2396302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
239769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
23987be8da07SAndreas Gruenbacher 		if (err)
2399b411b363SPhilipp Reisner 			goto out_interrupted;
24000500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2401b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
24027be8da07SAndreas Gruenbacher 		if (err) {
24030500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
24047be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2405b30ab791SAndreas Gruenbacher 				put_ldev(device);
240682bc0194SAndreas Gruenbacher 				return 0;
2407b411b363SPhilipp Reisner 			}
2408b411b363SPhilipp Reisner 			goto out_interrupted;
2409b411b363SPhilipp Reisner 		}
2410b874d231SPhilipp Reisner 	} else {
241169a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
24120500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2413b874d231SPhilipp Reisner 	}
2414a0fb3c47SLars Ellenberg 	/* if we use the zeroout fallback code, we process synchronously
2415a0fb3c47SLars Ellenberg 	 * and we wait for all pending requests, respectively wait for
2416a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2417a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2418a0fb3c47SLars Ellenberg 	if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2419b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
24200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2421b411b363SPhilipp Reisner 
2422b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2423b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2424b6a370baSPhilipp Reisner 
2425b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2426b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2427b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2428db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
24294dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
243021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2431b411b363SPhilipp Reisner 	}
2432b411b363SPhilipp Reisner 
2433b30ab791SAndreas Gruenbacher 	err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
243482bc0194SAndreas Gruenbacher 	if (!err)
243582bc0194SAndreas Gruenbacher 		return 0;
2436b411b363SPhilipp Reisner 
243710f6d992SLars Ellenberg 	/* don't care for the reason here */
2438d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
24390500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2440a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2441b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
24420500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
244321ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
244421ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2445b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
244621ae5d7fSLars Ellenberg 	}
244722cc37a9SLars Ellenberg 
2448b411b363SPhilipp Reisner out_interrupted:
2449bde89a9eSAndreas Gruenbacher 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
2450b30ab791SAndreas Gruenbacher 	put_ldev(device);
2451b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
245282bc0194SAndreas Gruenbacher 	return err;
2453b411b363SPhilipp Reisner }
2454b411b363SPhilipp Reisner 
24550f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
24560f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
24570f0601f4SLars Ellenberg  *
24580f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
24590f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
24600f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
24610f0601f4SLars Ellenberg  * activity, it obviously is "busy".
24620f0601f4SLars Ellenberg  *
24630f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
24640f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
24650f0601f4SLars Ellenberg  */
2466ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2467ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2468e8299874SLars Ellenberg {
2469e8299874SLars Ellenberg 	struct lc_element *tmp;
2470ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2471e8299874SLars Ellenberg 
2472ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2473ad3fee79SLars Ellenberg 		return throttle;
2474e8299874SLars Ellenberg 
2475e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2476e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2477e8299874SLars Ellenberg 	if (tmp) {
2478e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2479e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2480e8299874SLars Ellenberg 			throttle = false;
2481ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2482ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2483e8299874SLars Ellenberg 	}
2484e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2485e8299874SLars Ellenberg 
2486e8299874SLars Ellenberg 	return throttle;
2487e8299874SLars Ellenberg }
2488e8299874SLars Ellenberg 
2489e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
24900f0601f4SLars Ellenberg {
2491b30ab791SAndreas Gruenbacher 	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
24920f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2493daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2494e8299874SLars Ellenberg 	int curr_events;
2495daeda1ccSPhilipp Reisner 
2496daeda1ccSPhilipp Reisner 	rcu_read_lock();
2497b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2498daeda1ccSPhilipp Reisner 	rcu_read_unlock();
24990f0601f4SLars Ellenberg 
25000f0601f4SLars Ellenberg 	/* feature disabled? */
2501daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2502e8299874SLars Ellenberg 		return false;
2503e3555d85SPhilipp Reisner 
25040f0601f4SLars Ellenberg 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
25050f0601f4SLars Ellenberg 		      (int)part_stat_read(&disk->part0, sectors[1]) -
2506b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2507ad3fee79SLars Ellenberg 
2508ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2509ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
25100f0601f4SLars Ellenberg 		unsigned long rs_left;
25110f0601f4SLars Ellenberg 		int i;
25120f0601f4SLars Ellenberg 
2513b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
25140f0601f4SLars Ellenberg 
25150f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
25160f0601f4SLars Ellenberg 		 * approx. */
2517b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
25182649f080SLars Ellenberg 
2519b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2520b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
25212649f080SLars Ellenberg 		else
2522b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
25230f0601f4SLars Ellenberg 
2524b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
25250f0601f4SLars Ellenberg 		if (!dt)
25260f0601f4SLars Ellenberg 			dt++;
2527b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
25280f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
25290f0601f4SLars Ellenberg 
2530daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2531e8299874SLars Ellenberg 			return true;
25320f0601f4SLars Ellenberg 	}
2533e8299874SLars Ellenberg 	return false;
25340f0601f4SLars Ellenberg }
25350f0601f4SLars Ellenberg 
2536bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2537b411b363SPhilipp Reisner {
25389f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2539b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2540b411b363SPhilipp Reisner 	sector_t sector;
25414a76b161SAndreas Gruenbacher 	sector_t capacity;
2542db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2543b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2544b18b37beSPhilipp Reisner 	int size, verb;
2545b411b363SPhilipp Reisner 	unsigned int fault_type;
2546e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
25474a76b161SAndreas Gruenbacher 
25489f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25499f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25504a76b161SAndreas Gruenbacher 		return -EIO;
25519f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2552b30ab791SAndreas Gruenbacher 	capacity = drbd_get_capacity(device->this_bdev);
2553b411b363SPhilipp Reisner 
2554b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2555b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2556b411b363SPhilipp Reisner 
2557c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2558d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2559b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
256082bc0194SAndreas Gruenbacher 		return -EINVAL;
2561b411b363SPhilipp Reisner 	}
2562b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2563d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2564b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
256582bc0194SAndreas Gruenbacher 		return -EINVAL;
2566b411b363SPhilipp Reisner 	}
2567b411b363SPhilipp Reisner 
2568b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2569b18b37beSPhilipp Reisner 		verb = 1;
2570e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2571b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
257269a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2573b18b37beSPhilipp Reisner 			break;
2574b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2575b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2576b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
257769a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2578b18b37beSPhilipp Reisner 			break;
2579b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2580b18b37beSPhilipp Reisner 			verb = 0;
2581b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
258269a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2583b18b37beSPhilipp Reisner 			break;
2584b18b37beSPhilipp Reisner 		default:
258549ba9b1bSAndreas Gruenbacher 			BUG();
2586b18b37beSPhilipp Reisner 		}
2587b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2588d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2589b411b363SPhilipp Reisner 			    "no local data.\n");
2590b18b37beSPhilipp Reisner 
2591a821cc4aSLars Ellenberg 		/* drain possibly payload */
259269a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2593b411b363SPhilipp Reisner 	}
2594b411b363SPhilipp Reisner 
2595b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2596b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2597b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2598a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2599a0fb3c47SLars Ellenberg 			true /* has real payload */, GFP_NOIO);
2600db830c46SAndreas Gruenbacher 	if (!peer_req) {
2601b30ab791SAndreas Gruenbacher 		put_ldev(device);
260282bc0194SAndreas Gruenbacher 		return -ENOMEM;
2603b411b363SPhilipp Reisner 	}
2604b411b363SPhilipp Reisner 
2605e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2606b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2607a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2608b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
260980a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
261021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
261180a40e43SLars Ellenberg 		goto submit;
261280a40e43SLars Ellenberg 
2613b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2614a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2615b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
26165f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2617b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2618b411b363SPhilipp Reisner 		break;
2619b411b363SPhilipp Reisner 
2620b411b363SPhilipp Reisner 	case P_OV_REPLY:
2621b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2622b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2623e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2624b411b363SPhilipp Reisner 		if (!di)
2625b411b363SPhilipp Reisner 			goto out_free_e;
2626b411b363SPhilipp Reisner 
2627e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2628b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2629b411b363SPhilipp Reisner 
2630db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2631db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2632c36c3cedSLars Ellenberg 
26339f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2634b411b363SPhilipp Reisner 			goto out_free_e;
2635b411b363SPhilipp Reisner 
2636e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
26379f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2638a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
26395f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2640b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2641aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2642aaaba345SLars Ellenberg 			device->use_csums = true;
2643e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
26442649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2645b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2646a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2647b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
26480f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
26490f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
26500f0601f4SLars Ellenberg 			goto submit_for_resync;
2651b411b363SPhilipp Reisner 		}
2652b411b363SPhilipp Reisner 		break;
2653b411b363SPhilipp Reisner 
2654b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2655b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
26569f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2657de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2658de228bbaSLars Ellenberg 			int i;
2659b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2660b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2661b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2662b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2663de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2664b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2665b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2666de228bbaSLars Ellenberg 			}
2667d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2668b411b363SPhilipp Reisner 					(unsigned long long)sector);
2669b411b363SPhilipp Reisner 		}
2670a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2671b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2672b411b363SPhilipp Reisner 		break;
2673b411b363SPhilipp Reisner 
2674b411b363SPhilipp Reisner 	default:
267549ba9b1bSAndreas Gruenbacher 		BUG();
2676b411b363SPhilipp Reisner 	}
2677b411b363SPhilipp Reisner 
26780f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
26790f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
26800f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
26810f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
26820f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
26830f0601f4SLars Ellenberg 	 *
26840f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
26850f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
26860f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
26870f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
26880f0601f4SLars Ellenberg 	 * a while, anyways.
26890f0601f4SLars Ellenberg 	 */
2690b411b363SPhilipp Reisner 
26910f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
26920f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
26930f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
26940f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
26950f0601f4SLars Ellenberg 	 *
26960f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
26970f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
26980f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
26990f0601f4SLars Ellenberg 	 */
2700c5a2c150SLars Ellenberg 
2701c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2702c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2703c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2704c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2705c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2706c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2707c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
2708c5a2c150SLars Ellenberg 
2709944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2710ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
2711ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
2712e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2713944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
2714b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
271580a40e43SLars Ellenberg 		goto out_free_e;
2716b411b363SPhilipp Reisner 
27170f0601f4SLars Ellenberg submit_for_resync:
2718b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
27190f0601f4SLars Ellenberg 
272080a40e43SLars Ellenberg submit:
2721944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
2722b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2723b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
272482bc0194SAndreas Gruenbacher 		return 0;
2725b411b363SPhilipp Reisner 
272610f6d992SLars Ellenberg 	/* don't care for the reason here */
2727d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
2728c5a2c150SLars Ellenberg 
2729c5a2c150SLars Ellenberg out_free_e:
27300500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2731a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
27320500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
273322cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
273422cc37a9SLars Ellenberg 
2735b30ab791SAndreas Gruenbacher 	put_ldev(device);
2736b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
273782bc0194SAndreas Gruenbacher 	return -EIO;
2738b411b363SPhilipp Reisner }
2739b411b363SPhilipp Reisner 
274069a22773SAndreas Gruenbacher /**
274169a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
274269a22773SAndreas Gruenbacher  */
274369a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
2744b411b363SPhilipp Reisner {
274569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2746b411b363SPhilipp Reisner 	int self, peer, rv = -100;
2747b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
274844ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
2749b411b363SPhilipp Reisner 
2750b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
2751b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
2752b411b363SPhilipp Reisner 
2753b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
2754b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
2755b411b363SPhilipp Reisner 
275644ed167dSPhilipp Reisner 	rcu_read_lock();
275769a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
275844ed167dSPhilipp Reisner 	rcu_read_unlock();
275944ed167dSPhilipp Reisner 	switch (after_sb_0p) {
2760b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2761b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2762b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
276344ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
2764d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2765b411b363SPhilipp Reisner 		break;
2766b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2767b411b363SPhilipp Reisner 		break;
2768b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2769b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2770b411b363SPhilipp Reisner 			rv = -1;
2771b411b363SPhilipp Reisner 			break;
2772b411b363SPhilipp Reisner 		}
2773b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2774b411b363SPhilipp Reisner 			rv =  1;
2775b411b363SPhilipp Reisner 			break;
2776b411b363SPhilipp Reisner 		}
2777b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2778b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2779b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2780b411b363SPhilipp Reisner 			rv = 1;
2781b411b363SPhilipp Reisner 			break;
2782b411b363SPhilipp Reisner 		}
2783b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2784b411b363SPhilipp Reisner 			rv = -1;
2785b411b363SPhilipp Reisner 			break;
2786b411b363SPhilipp Reisner 		}
2787b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2788d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
2789b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
2790b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2791b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
279269a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2793b411b363SPhilipp Reisner 				? -1 : 1;
2794b411b363SPhilipp Reisner 			break;
2795b411b363SPhilipp Reisner 		} else {
2796b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
2797b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
2798b411b363SPhilipp Reisner 		}
279944ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
2800b411b363SPhilipp Reisner 			break;
2801b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2802b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
2803b411b363SPhilipp Reisner 			rv = -1;
2804b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
2805b411b363SPhilipp Reisner 			rv =  1;
2806b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
2807b411b363SPhilipp Reisner 		     /* Well, then use something else. */
280869a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2809b411b363SPhilipp Reisner 				? -1 : 1;
2810b411b363SPhilipp Reisner 		break;
2811b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2812b411b363SPhilipp Reisner 		rv = -1;
2813b411b363SPhilipp Reisner 		break;
2814b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2815b411b363SPhilipp Reisner 		rv =  1;
2816b411b363SPhilipp Reisner 	}
2817b411b363SPhilipp Reisner 
2818b411b363SPhilipp Reisner 	return rv;
2819b411b363SPhilipp Reisner }
2820b411b363SPhilipp Reisner 
282169a22773SAndreas Gruenbacher /**
282269a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
282369a22773SAndreas Gruenbacher  */
282469a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
2825b411b363SPhilipp Reisner {
282669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
28276184ea21SAndreas Gruenbacher 	int hg, rv = -100;
282844ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
2829b411b363SPhilipp Reisner 
283044ed167dSPhilipp Reisner 	rcu_read_lock();
283169a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
283244ed167dSPhilipp Reisner 	rcu_read_unlock();
283344ed167dSPhilipp Reisner 	switch (after_sb_1p) {
2834b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2835b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2836b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2837b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2838b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
283944ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2840d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2841b411b363SPhilipp Reisner 		break;
2842b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2843b411b363SPhilipp Reisner 		break;
2844b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
284569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2846b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
2847b411b363SPhilipp Reisner 			rv = hg;
2848b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
2849b411b363SPhilipp Reisner 			rv = hg;
2850b411b363SPhilipp Reisner 		break;
2851b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
285269a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
2853b411b363SPhilipp Reisner 		break;
2854b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2855b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
2856b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
285769a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2858b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
2859bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2860bb437946SAndreas Gruenbacher 
2861b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2862b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2863b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2864b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
2865bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2866b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
2867b411b363SPhilipp Reisner 			} else {
2868d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
2869b411b363SPhilipp Reisner 				rv = hg;
2870b411b363SPhilipp Reisner 			}
2871b411b363SPhilipp Reisner 		} else
2872b411b363SPhilipp Reisner 			rv = hg;
2873b411b363SPhilipp Reisner 	}
2874b411b363SPhilipp Reisner 
2875b411b363SPhilipp Reisner 	return rv;
2876b411b363SPhilipp Reisner }
2877b411b363SPhilipp Reisner 
287869a22773SAndreas Gruenbacher /**
287969a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
288069a22773SAndreas Gruenbacher  */
288169a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
2882b411b363SPhilipp Reisner {
288369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
28846184ea21SAndreas Gruenbacher 	int hg, rv = -100;
288544ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
2886b411b363SPhilipp Reisner 
288744ed167dSPhilipp Reisner 	rcu_read_lock();
288869a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
288944ed167dSPhilipp Reisner 	rcu_read_unlock();
289044ed167dSPhilipp Reisner 	switch (after_sb_2p) {
2891b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2892b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2893b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2894b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2895b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2896b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2897b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
289844ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2899d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2900b411b363SPhilipp Reisner 		break;
2901b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
290269a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
2903b411b363SPhilipp Reisner 		break;
2904b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2905b411b363SPhilipp Reisner 		break;
2906b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
290769a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2908b411b363SPhilipp Reisner 		if (hg == -1) {
2909bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2910bb437946SAndreas Gruenbacher 
2911b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2912b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2913b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2914b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
2915bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2916b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
2917b411b363SPhilipp Reisner 			} else {
2918d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
2919b411b363SPhilipp Reisner 				rv = hg;
2920b411b363SPhilipp Reisner 			}
2921b411b363SPhilipp Reisner 		} else
2922b411b363SPhilipp Reisner 			rv = hg;
2923b411b363SPhilipp Reisner 	}
2924b411b363SPhilipp Reisner 
2925b411b363SPhilipp Reisner 	return rv;
2926b411b363SPhilipp Reisner }
2927b411b363SPhilipp Reisner 
2928b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
2929b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
2930b411b363SPhilipp Reisner {
2931b411b363SPhilipp Reisner 	if (!uuid) {
2932d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
2933b411b363SPhilipp Reisner 		return;
2934b411b363SPhilipp Reisner 	}
2935d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2936b411b363SPhilipp Reisner 	     text,
2937b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
2938b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
2939b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
2940b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
2941b411b363SPhilipp Reisner 	     (unsigned long long)bits,
2942b411b363SPhilipp Reisner 	     (unsigned long long)flags);
2943b411b363SPhilipp Reisner }
2944b411b363SPhilipp Reisner 
2945b411b363SPhilipp Reisner /*
2946b411b363SPhilipp Reisner   100	after split brain try auto recover
2947b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
2948b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
2949b411b363SPhilipp Reisner     0	no Sync
2950b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
2951b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
2952b411b363SPhilipp Reisner  -100	after split brain, disconnect
2953b411b363SPhilipp Reisner -1000	unrelated data
29544a23f264SPhilipp Reisner -1091   requires proto 91
29554a23f264SPhilipp Reisner -1096   requires proto 96
2956b411b363SPhilipp Reisner  */
295744a4d551SLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
2958b411b363SPhilipp Reisner {
295944a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
296044a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
2961b411b363SPhilipp Reisner 	u64 self, peer;
2962b411b363SPhilipp Reisner 	int i, j;
2963b411b363SPhilipp Reisner 
2964b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2965b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
2966b411b363SPhilipp Reisner 
2967b411b363SPhilipp Reisner 	*rule_nr = 10;
2968b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2969b411b363SPhilipp Reisner 		return 0;
2970b411b363SPhilipp Reisner 
2971b411b363SPhilipp Reisner 	*rule_nr = 20;
2972b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2973b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
2974b411b363SPhilipp Reisner 		return -2;
2975b411b363SPhilipp Reisner 
2976b411b363SPhilipp Reisner 	*rule_nr = 30;
2977b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
2978b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
2979b411b363SPhilipp Reisner 		return 2;
2980b411b363SPhilipp Reisner 
2981b411b363SPhilipp Reisner 	if (self == peer) {
2982b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
2983b411b363SPhilipp Reisner 
2984b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2985b411b363SPhilipp Reisner 
298644a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
29874a23f264SPhilipp Reisner 				return -1091;
2988b411b363SPhilipp Reisner 
2989b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2990b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2991d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
2992b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
2993b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2994b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
2995b411b363SPhilipp Reisner 
2996b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2997b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
2998b411b363SPhilipp Reisner 				*rule_nr = 34;
2999b411b363SPhilipp Reisner 			} else {
3000d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3001b411b363SPhilipp Reisner 				*rule_nr = 36;
3002b411b363SPhilipp Reisner 			}
3003b411b363SPhilipp Reisner 
3004b411b363SPhilipp Reisner 			return 1;
3005b411b363SPhilipp Reisner 		}
3006b411b363SPhilipp Reisner 
3007b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3008b411b363SPhilipp Reisner 
300944a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
30104a23f264SPhilipp Reisner 				return -1091;
3011b411b363SPhilipp Reisner 
3012b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3013b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3014d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3015b411b363SPhilipp Reisner 
3016b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3017b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3018b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3019b411b363SPhilipp Reisner 
3020b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3021b411b363SPhilipp Reisner 				*rule_nr = 35;
3022b411b363SPhilipp Reisner 			} else {
3023d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3024b411b363SPhilipp Reisner 				*rule_nr = 37;
3025b411b363SPhilipp Reisner 			}
3026b411b363SPhilipp Reisner 
3027b411b363SPhilipp Reisner 			return -1;
3028b411b363SPhilipp Reisner 		}
3029b411b363SPhilipp Reisner 
3030b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3031b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3032b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3033b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3034b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3035b411b363SPhilipp Reisner 		*rule_nr = 40;
3036b411b363SPhilipp Reisner 
3037b411b363SPhilipp Reisner 		switch (rct) {
3038b411b363SPhilipp Reisner 		case 0: /* !self_pri && !peer_pri */ return 0;
3039b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3040b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3041b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
304244a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3043b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3044b411b363SPhilipp Reisner 		}
3045b411b363SPhilipp Reisner 	}
3046b411b363SPhilipp Reisner 
3047b411b363SPhilipp Reisner 	*rule_nr = 50;
3048b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3049b411b363SPhilipp Reisner 	if (self == peer)
3050b411b363SPhilipp Reisner 		return -1;
3051b411b363SPhilipp Reisner 
3052b411b363SPhilipp Reisner 	*rule_nr = 51;
3053b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3054b411b363SPhilipp Reisner 	if (self == peer) {
305544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3056b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3057b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3058b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3059b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3060b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3061b411b363SPhilipp Reisner 
306244a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
30634a23f264SPhilipp Reisner 				return -1091;
3064b411b363SPhilipp Reisner 
3065b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3066b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
30674a23f264SPhilipp Reisner 
3068d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3069b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
30704a23f264SPhilipp Reisner 
3071b411b363SPhilipp Reisner 			return -1;
3072b411b363SPhilipp Reisner 		}
3073b411b363SPhilipp Reisner 	}
3074b411b363SPhilipp Reisner 
3075b411b363SPhilipp Reisner 	*rule_nr = 60;
3076b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3077b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3078b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3079b411b363SPhilipp Reisner 		if (self == peer)
3080b411b363SPhilipp Reisner 			return -2;
3081b411b363SPhilipp Reisner 	}
3082b411b363SPhilipp Reisner 
3083b411b363SPhilipp Reisner 	*rule_nr = 70;
3084b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3085b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3086b411b363SPhilipp Reisner 	if (self == peer)
3087b411b363SPhilipp Reisner 		return 1;
3088b411b363SPhilipp Reisner 
3089b411b363SPhilipp Reisner 	*rule_nr = 71;
3090b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3091b411b363SPhilipp Reisner 	if (self == peer) {
309244a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3093b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3094b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3095b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3096b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3097b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3098b411b363SPhilipp Reisner 
309944a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
31004a23f264SPhilipp Reisner 				return -1091;
3101b411b363SPhilipp Reisner 
3102b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3103b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3104b411b363SPhilipp Reisner 
3105d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3106b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3107b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3108b411b363SPhilipp Reisner 
3109b411b363SPhilipp Reisner 			return 1;
3110b411b363SPhilipp Reisner 		}
3111b411b363SPhilipp Reisner 	}
3112b411b363SPhilipp Reisner 
3113b411b363SPhilipp Reisner 
3114b411b363SPhilipp Reisner 	*rule_nr = 80;
3115b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3116b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3117b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3118b411b363SPhilipp Reisner 		if (self == peer)
3119b411b363SPhilipp Reisner 			return 2;
3120b411b363SPhilipp Reisner 	}
3121b411b363SPhilipp Reisner 
3122b411b363SPhilipp Reisner 	*rule_nr = 90;
3123b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3124b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3125b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3126b411b363SPhilipp Reisner 		return 100;
3127b411b363SPhilipp Reisner 
3128b411b363SPhilipp Reisner 	*rule_nr = 100;
3129b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3130b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3131b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3132b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3133b411b363SPhilipp Reisner 			if (self == peer)
3134b411b363SPhilipp Reisner 				return -100;
3135b411b363SPhilipp Reisner 		}
3136b411b363SPhilipp Reisner 	}
3137b411b363SPhilipp Reisner 
3138b411b363SPhilipp Reisner 	return -1000;
3139b411b363SPhilipp Reisner }
3140b411b363SPhilipp Reisner 
3141b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3142b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3143b411b363SPhilipp Reisner  */
314469a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
314569a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3146b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3147b411b363SPhilipp Reisner {
314869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3149b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3150b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
315144ed167dSPhilipp Reisner 	struct net_conf *nc;
31526dff2902SAndreas Gruenbacher 	int hg, rule_nr, rr_conflict, tentative;
3153b411b363SPhilipp Reisner 
3154b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3155b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3156b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3157b411b363SPhilipp Reisner 
3158d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
31599f2247bbSPhilipp Reisner 
3160b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3161b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3162b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3163b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3164b411b363SPhilipp Reisner 
3165b30ab791SAndreas Gruenbacher 	hg = drbd_uuid_compare(device, &rule_nr);
3166b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3167b411b363SPhilipp Reisner 
3168d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3169b411b363SPhilipp Reisner 
3170b411b363SPhilipp Reisner 	if (hg == -1000) {
3171d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3172b411b363SPhilipp Reisner 		return C_MASK;
3173b411b363SPhilipp Reisner 	}
31744a23f264SPhilipp Reisner 	if (hg < -1000) {
3175d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3176b411b363SPhilipp Reisner 		return C_MASK;
3177b411b363SPhilipp Reisner 	}
3178b411b363SPhilipp Reisner 
3179b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3180b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3181b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3182b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3183b411b363SPhilipp Reisner 		if (f)
3184b411b363SPhilipp Reisner 			hg = hg*2;
3185d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3186b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3187b411b363SPhilipp Reisner 	}
3188b411b363SPhilipp Reisner 
31893a11a487SAdam Gandelman 	if (abs(hg) == 100)
3190b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
31913a11a487SAdam Gandelman 
319244ed167dSPhilipp Reisner 	rcu_read_lock();
319369a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
319444ed167dSPhilipp Reisner 
319544ed167dSPhilipp Reisner 	if (hg == 100 || (hg == -100 && nc->always_asbp)) {
3196b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3197b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3198b411b363SPhilipp Reisner 		int forced = (hg == -100);
3199b411b363SPhilipp Reisner 
3200b411b363SPhilipp Reisner 		switch (pcount) {
3201b411b363SPhilipp Reisner 		case 0:
320269a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3203b411b363SPhilipp Reisner 			break;
3204b411b363SPhilipp Reisner 		case 1:
320569a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3206b411b363SPhilipp Reisner 			break;
3207b411b363SPhilipp Reisner 		case 2:
320869a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3209b411b363SPhilipp Reisner 			break;
3210b411b363SPhilipp Reisner 		}
3211b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3212d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3213b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3214b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3215b411b363SPhilipp Reisner 			if (forced) {
3216d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3217b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3218b411b363SPhilipp Reisner 				hg = hg*2;
3219b411b363SPhilipp Reisner 			}
3220b411b363SPhilipp Reisner 		}
3221b411b363SPhilipp Reisner 	}
3222b411b363SPhilipp Reisner 
3223b411b363SPhilipp Reisner 	if (hg == -100) {
3224b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3225b411b363SPhilipp Reisner 			hg = -1;
3226b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3227b411b363SPhilipp Reisner 			hg = 1;
3228b411b363SPhilipp Reisner 
3229b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3230d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3231b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3232b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3233b411b363SPhilipp Reisner 	}
323444ed167dSPhilipp Reisner 	rr_conflict = nc->rr_conflict;
32356dff2902SAndreas Gruenbacher 	tentative = nc->tentative;
323644ed167dSPhilipp Reisner 	rcu_read_unlock();
3237b411b363SPhilipp Reisner 
3238b411b363SPhilipp Reisner 	if (hg == -100) {
3239580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3240580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3241580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3242580b9767SLars Ellenberg 		 * to that disk, in a way... */
3243d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3244b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3245b411b363SPhilipp Reisner 		return C_MASK;
3246b411b363SPhilipp Reisner 	}
3247b411b363SPhilipp Reisner 
3248b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3249d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3250b411b363SPhilipp Reisner 		return C_MASK;
3251b411b363SPhilipp Reisner 	}
3252b411b363SPhilipp Reisner 
3253b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3254b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
325544ed167dSPhilipp Reisner 		switch (rr_conflict) {
3256b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3257b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3258b411b363SPhilipp Reisner 			/* fall through */
3259b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3260d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3261b411b363SPhilipp Reisner 			return C_MASK;
3262b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3263d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3264b411b363SPhilipp Reisner 			     "assumption\n");
3265b411b363SPhilipp Reisner 		}
3266b411b363SPhilipp Reisner 	}
3267b411b363SPhilipp Reisner 
326869a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3269cf14c2e9SPhilipp Reisner 		if (hg == 0)
3270d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3271cf14c2e9SPhilipp Reisner 		else
3272d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3273cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3274cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3275cf14c2e9SPhilipp Reisner 		return C_MASK;
3276cf14c2e9SPhilipp Reisner 	}
3277cf14c2e9SPhilipp Reisner 
3278b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3279d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3280b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
328120ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3282b411b363SPhilipp Reisner 			return C_MASK;
3283b411b363SPhilipp Reisner 	}
3284b411b363SPhilipp Reisner 
3285b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3286b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3287b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3288b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3289b411b363SPhilipp Reisner 	} else {
3290b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3291b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3292d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3293b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3294b411b363SPhilipp Reisner 		}
3295b411b363SPhilipp Reisner 	}
3296b411b363SPhilipp Reisner 
3297b411b363SPhilipp Reisner 	return rv;
3298b411b363SPhilipp Reisner }
3299b411b363SPhilipp Reisner 
3300f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3301b411b363SPhilipp Reisner {
3302b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3303f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3304f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3305b411b363SPhilipp Reisner 
3306b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3307f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3308f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3309b411b363SPhilipp Reisner 
3310b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3311f179d76dSPhilipp Reisner 	return peer;
3312b411b363SPhilipp Reisner }
3313b411b363SPhilipp Reisner 
3314bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3315b411b363SPhilipp Reisner {
3316e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3317036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3318036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3319036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3320036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
3321accdbcc5SAndreas Gruenbacher 	struct crypto_hash *peer_integrity_tfm = NULL;
33227aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3323b411b363SPhilipp Reisner 
3324b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3325b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3326b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3327b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3328b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3329cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
33306139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3331cf14c2e9SPhilipp Reisner 
3332bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
333386db0618SAndreas Gruenbacher 		int err;
333486db0618SAndreas Gruenbacher 
333588104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
333686db0618SAndreas Gruenbacher 			return -EIO;
3337bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
333886db0618SAndreas Gruenbacher 		if (err)
333986db0618SAndreas Gruenbacher 			return err;
334088104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3341036b17eaSPhilipp Reisner 	}
334286db0618SAndreas Gruenbacher 
33437d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3344bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3345cf14c2e9SPhilipp Reisner 
3346cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3347bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3348b411b363SPhilipp Reisner 
334944ed167dSPhilipp Reisner 		rcu_read_lock();
3350bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
335144ed167dSPhilipp Reisner 
3352036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
33531ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
335444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3355b411b363SPhilipp Reisner 		}
3356b411b363SPhilipp Reisner 
3357f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
33581ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
335944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3360b411b363SPhilipp Reisner 		}
3361b411b363SPhilipp Reisner 
3362f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
33631ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
336444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3365b411b363SPhilipp Reisner 		}
3366b411b363SPhilipp Reisner 
3367f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
33681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
336944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3370b411b363SPhilipp Reisner 		}
3371b411b363SPhilipp Reisner 
33726139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
33731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
337444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3375b411b363SPhilipp Reisner 		}
3376b411b363SPhilipp Reisner 
337744ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
33781ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
337944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3380b411b363SPhilipp Reisner 		}
3381b411b363SPhilipp Reisner 
3382036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
33831ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3384036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3385036b17eaSPhilipp Reisner 		}
3386036b17eaSPhilipp Reisner 
338786db0618SAndreas Gruenbacher 		rcu_read_unlock();
3388fbc12f45SAndreas Gruenbacher 	}
33897d4c782cSAndreas Gruenbacher 
33907d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
33917d4c782cSAndreas Gruenbacher 		int hash_size;
33927d4c782cSAndreas Gruenbacher 
33937d4c782cSAndreas Gruenbacher 		/*
33947d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
33957d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
33967d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
33977d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
33987d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
33997d4c782cSAndreas Gruenbacher 		 * change.
34007d4c782cSAndreas Gruenbacher 		 */
34017d4c782cSAndreas Gruenbacher 
34027d4c782cSAndreas Gruenbacher 		peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
34037d4c782cSAndreas Gruenbacher 		if (!peer_integrity_tfm) {
34041ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
34057d4c782cSAndreas Gruenbacher 				 integrity_alg);
3406b411b363SPhilipp Reisner 			goto disconnect;
3407b411b363SPhilipp Reisner 		}
3408b411b363SPhilipp Reisner 
34097d4c782cSAndreas Gruenbacher 		hash_size = crypto_hash_digestsize(peer_integrity_tfm);
34107d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
34117d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
34127d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
34131ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
34147d4c782cSAndreas Gruenbacher 			goto disconnect;
34157d4c782cSAndreas Gruenbacher 		}
34167d4c782cSAndreas Gruenbacher 	}
34177d4c782cSAndreas Gruenbacher 
34187d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
34197d4c782cSAndreas Gruenbacher 	if (!new_net_conf) {
34201ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Allocation of new net_conf failed\n");
3421b411b363SPhilipp Reisner 		goto disconnect;
3422b411b363SPhilipp Reisner 	}
3423b411b363SPhilipp Reisner 
3424bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
34250500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3426bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
34277d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3428b411b363SPhilipp Reisner 
34297d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
34307d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
34317d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
34327d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
34337d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3434b411b363SPhilipp Reisner 
3435bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
34360500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3437bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3438b411b363SPhilipp Reisner 
3439bde89a9eSAndreas Gruenbacher 	crypto_free_hash(connection->peer_integrity_tfm);
3440bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3441bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3442bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3443bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3444bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3445b411b363SPhilipp Reisner 
34467d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
34471ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
34487d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3449b411b363SPhilipp Reisner 
34507d4c782cSAndreas Gruenbacher 	synchronize_rcu();
34517d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
345282bc0194SAndreas Gruenbacher 	return 0;
3453b411b363SPhilipp Reisner 
345444ed167dSPhilipp Reisner disconnect_rcu_unlock:
345544ed167dSPhilipp Reisner 	rcu_read_unlock();
3456b411b363SPhilipp Reisner disconnect:
3457b792c35cSAndreas Gruenbacher 	crypto_free_hash(peer_integrity_tfm);
3458036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3459036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3460bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
346182bc0194SAndreas Gruenbacher 	return -EIO;
3462b411b363SPhilipp Reisner }
3463b411b363SPhilipp Reisner 
3464b411b363SPhilipp Reisner /* helper function
3465b411b363SPhilipp Reisner  * input: alg name, feature name
3466b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3467b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3468b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
34698ce953aaSLars Ellenberg static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
3470b411b363SPhilipp Reisner 		const char *alg, const char *name)
3471b411b363SPhilipp Reisner {
3472b411b363SPhilipp Reisner 	struct crypto_hash *tfm;
3473b411b363SPhilipp Reisner 
3474b411b363SPhilipp Reisner 	if (!alg[0])
3475b411b363SPhilipp Reisner 		return NULL;
3476b411b363SPhilipp Reisner 
3477b411b363SPhilipp Reisner 	tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3478b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3479d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3480b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3481b411b363SPhilipp Reisner 		return tfm;
3482b411b363SPhilipp Reisner 	}
3483b411b363SPhilipp Reisner 	return tfm;
3484b411b363SPhilipp Reisner }
3485b411b363SPhilipp Reisner 
3486bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3487b411b363SPhilipp Reisner {
3488bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
34894a76b161SAndreas Gruenbacher 	int size = pi->size;
34904a76b161SAndreas Gruenbacher 
34914a76b161SAndreas Gruenbacher 	while (size) {
34924a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3493bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
34944a76b161SAndreas Gruenbacher 		if (s <= 0) {
34954a76b161SAndreas Gruenbacher 			if (s < 0)
34964a76b161SAndreas Gruenbacher 				return s;
34974a76b161SAndreas Gruenbacher 			break;
34984a76b161SAndreas Gruenbacher 		}
34994a76b161SAndreas Gruenbacher 		size -= s;
35004a76b161SAndreas Gruenbacher 	}
35014a76b161SAndreas Gruenbacher 	if (size)
35024a76b161SAndreas Gruenbacher 		return -EIO;
35034a76b161SAndreas Gruenbacher 	return 0;
35044a76b161SAndreas Gruenbacher }
35054a76b161SAndreas Gruenbacher 
35064a76b161SAndreas Gruenbacher /*
35074a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
35084a76b161SAndreas Gruenbacher  *
35094a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
35104a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
35114a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
35124a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
35134a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
35144a76b161SAndreas Gruenbacher  *
35154a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
35164a76b161SAndreas Gruenbacher  */
3517bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
35184a76b161SAndreas Gruenbacher {
35191ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
35202fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3521bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
35224a76b161SAndreas Gruenbacher }
35234a76b161SAndreas Gruenbacher 
3524bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
35254a76b161SAndreas Gruenbacher {
35269f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3527b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3528e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3529b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
3530b411b363SPhilipp Reisner 	struct crypto_hash *verify_tfm = NULL;
3531b411b363SPhilipp Reisner 	struct crypto_hash *csums_tfm = NULL;
35322ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3533813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3534bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3535813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
3536778f271dSPhilipp Reisner 	int fifo_size = 0;
353782bc0194SAndreas Gruenbacher 	int err;
3538b411b363SPhilipp Reisner 
35399f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
35409f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3541bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
35429f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3543b411b363SPhilipp Reisner 
3544b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3545b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3546b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
35478e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
35488e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3549b411b363SPhilipp Reisner 
3550e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3551d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3552e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
355382bc0194SAndreas Gruenbacher 		return -EIO;
3554b411b363SPhilipp Reisner 	}
3555b411b363SPhilipp Reisner 
3556b411b363SPhilipp Reisner 	if (apv <= 88) {
3557e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3558e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35598e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3560e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3561e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35620b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
35638e26f9ccSPhilipp Reisner 	} else {
3564e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3565e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35660b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3567b411b363SPhilipp Reisner 	}
3568b411b363SPhilipp Reisner 
3569b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3570e658983aSAndreas Gruenbacher 	p = pi->data;
3571b411b363SPhilipp Reisner 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3572b411b363SPhilipp Reisner 
35739f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
357482bc0194SAndreas Gruenbacher 	if (err)
357582bc0194SAndreas Gruenbacher 		return err;
3576b411b363SPhilipp Reisner 
35770500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
35789f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3579b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3580daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3581daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3582b30ab791SAndreas Gruenbacher 			put_ldev(device);
35830500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3584d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3585daeda1ccSPhilipp Reisner 			return -ENOMEM;
3586f399002eSLars Ellenberg 		}
3587b411b363SPhilipp Reisner 
3588b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3589daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3590daeda1ccSPhilipp Reisner 
35916394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3592813472ceSPhilipp Reisner 	}
3593b411b363SPhilipp Reisner 
3594b411b363SPhilipp Reisner 	if (apv >= 88) {
3595b411b363SPhilipp Reisner 		if (apv == 88) {
35965de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3597d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
35985de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3599b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3600813472ceSPhilipp Reisner 				err = -EIO;
3601813472ceSPhilipp Reisner 				goto reconnect;
3602b411b363SPhilipp Reisner 			}
3603b411b363SPhilipp Reisner 
36049f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3605813472ceSPhilipp Reisner 			if (err)
3606813472ceSPhilipp Reisner 				goto reconnect;
3607b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3608b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
36090b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3610b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3611b411b363SPhilipp Reisner 
3612b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3613b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3614b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
36150b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
36160b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3617b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3618b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3619b411b363SPhilipp Reisner 		}
3620b411b363SPhilipp Reisner 
36212ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3622b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3623d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
36242ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3625b411b363SPhilipp Reisner 				goto disconnect;
3626b411b363SPhilipp Reisner 			}
3627b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3628b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3629b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3630b411b363SPhilipp Reisner 				verify_tfm = NULL;
3631b411b363SPhilipp Reisner 				goto disconnect;
3632b411b363SPhilipp Reisner 			}
3633b411b363SPhilipp Reisner 		}
3634b411b363SPhilipp Reisner 
36352ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3636b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3637d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
36382ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3639b411b363SPhilipp Reisner 				goto disconnect;
3640b411b363SPhilipp Reisner 			}
3641b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3642b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3643b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3644b411b363SPhilipp Reisner 				csums_tfm = NULL;
3645b411b363SPhilipp Reisner 				goto disconnect;
3646b411b363SPhilipp Reisner 			}
3647b411b363SPhilipp Reisner 		}
3648b411b363SPhilipp Reisner 
3649813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3650daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3651daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3652daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3653daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3654778f271dSPhilipp Reisner 
3655daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3656b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3657813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3658813472ceSPhilipp Reisner 				if (!new_plan) {
3659d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3660b30ab791SAndreas Gruenbacher 					put_ldev(device);
3661778f271dSPhilipp Reisner 					goto disconnect;
3662778f271dSPhilipp Reisner 				}
3663778f271dSPhilipp Reisner 			}
36648e26f9ccSPhilipp Reisner 		}
3665b411b363SPhilipp Reisner 
366691fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
36672ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
36682ec91e0eSPhilipp Reisner 			if (!new_net_conf) {
3669d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new net_conf failed\n");
367091fd4dadSPhilipp Reisner 				goto disconnect;
367191fd4dadSPhilipp Reisner 			}
367291fd4dadSPhilipp Reisner 
36732ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
367491fd4dadSPhilipp Reisner 
3675b411b363SPhilipp Reisner 			if (verify_tfm) {
36762ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
36772ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
36789f4fe9adSAndreas Gruenbacher 				crypto_free_hash(peer_device->connection->verify_tfm);
36799f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
3680d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3681b411b363SPhilipp Reisner 			}
3682b411b363SPhilipp Reisner 			if (csums_tfm) {
36832ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
36842ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
36859f4fe9adSAndreas Gruenbacher 				crypto_free_hash(peer_device->connection->csums_tfm);
36869f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
3687d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3688b411b363SPhilipp Reisner 			}
3689bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
3690778f271dSPhilipp Reisner 		}
3691b411b363SPhilipp Reisner 	}
3692b411b363SPhilipp Reisner 
3693813472ceSPhilipp Reisner 	if (new_disk_conf) {
3694b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3695b30ab791SAndreas Gruenbacher 		put_ldev(device);
3696b411b363SPhilipp Reisner 	}
3697813472ceSPhilipp Reisner 
3698813472ceSPhilipp Reisner 	if (new_plan) {
3699b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
3700b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
3701813472ceSPhilipp Reisner 	}
3702daeda1ccSPhilipp Reisner 
37030500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3704daeda1ccSPhilipp Reisner 	synchronize_rcu();
3705daeda1ccSPhilipp Reisner 	if (new_net_conf)
3706daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
3707daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
3708813472ceSPhilipp Reisner 	kfree(old_plan);
3709daeda1ccSPhilipp Reisner 
371082bc0194SAndreas Gruenbacher 	return 0;
3711b411b363SPhilipp Reisner 
3712813472ceSPhilipp Reisner reconnect:
3713813472ceSPhilipp Reisner 	if (new_disk_conf) {
3714b30ab791SAndreas Gruenbacher 		put_ldev(device);
3715813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3716813472ceSPhilipp Reisner 	}
37170500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3718813472ceSPhilipp Reisner 	return -EIO;
3719813472ceSPhilipp Reisner 
3720b411b363SPhilipp Reisner disconnect:
3721813472ceSPhilipp Reisner 	kfree(new_plan);
3722813472ceSPhilipp Reisner 	if (new_disk_conf) {
3723b30ab791SAndreas Gruenbacher 		put_ldev(device);
3724813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3725813472ceSPhilipp Reisner 	}
37260500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3727b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
3728b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
3729b411b363SPhilipp Reisner 	crypto_free_hash(csums_tfm);
3730b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
3731b411b363SPhilipp Reisner 	crypto_free_hash(verify_tfm);
37329f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
373382bc0194SAndreas Gruenbacher 	return -EIO;
3734b411b363SPhilipp Reisner }
3735b411b363SPhilipp Reisner 
3736b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
3737b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
3738b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
3739b411b363SPhilipp Reisner {
3740b411b363SPhilipp Reisner 	sector_t d;
3741b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
3742b411b363SPhilipp Reisner 		return;
3743b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
3744b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
3745d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
3746b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
3747b411b363SPhilipp Reisner }
3748b411b363SPhilipp Reisner 
3749bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
3750b411b363SPhilipp Reisner {
37519f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3752b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3753e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
3754e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
37556a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
3756b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
3757e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
3758b411b363SPhilipp Reisner 
37599f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
37609f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3761bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
37629f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
37634a76b161SAndreas Gruenbacher 
3764b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
3765b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
37666a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
3767b411b363SPhilipp Reisner 
3768b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
3769b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
3770b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
3771b411b363SPhilipp Reisner 
3772b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3773daeda1ccSPhilipp Reisner 		rcu_read_lock();
3774b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
3775daeda1ccSPhilipp Reisner 		rcu_read_unlock();
3776daeda1ccSPhilipp Reisner 
3777b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
3778b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
3779b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
3780daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
3781b411b363SPhilipp Reisner 
3782b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
3783b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
3784b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
3785daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
3786b411b363SPhilipp Reisner 
3787b411b363SPhilipp Reisner 		/* Never shrink a device with usable data during connect.
3788b411b363SPhilipp Reisner 		   But allow online shrinking if we are connected. */
3789b30ab791SAndreas Gruenbacher 		if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3790b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) &&
3791b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
3792b30ab791SAndreas Gruenbacher 		    device->state.conn < C_CONNECTED) {
3793d0180171SAndreas Gruenbacher 			drbd_err(device, "The peer's disk size is too small!\n");
37949f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
3795b30ab791SAndreas Gruenbacher 			put_ldev(device);
379682bc0194SAndreas Gruenbacher 			return -EIO;
3797b411b363SPhilipp Reisner 		}
3798daeda1ccSPhilipp Reisner 
3799daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
3800daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3801daeda1ccSPhilipp Reisner 
3802daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3803daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
3804d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new disk_conf failed\n");
3805b30ab791SAndreas Gruenbacher 				put_ldev(device);
3806daeda1ccSPhilipp Reisner 				return -ENOMEM;
3807daeda1ccSPhilipp Reisner 			}
3808daeda1ccSPhilipp Reisner 
38090500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
3810b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
3811daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
3812daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
3813daeda1ccSPhilipp Reisner 
3814b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
38150500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3816daeda1ccSPhilipp Reisner 			synchronize_rcu();
3817daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
3818daeda1ccSPhilipp Reisner 
3819d0180171SAndreas Gruenbacher 			drbd_info(device, "Peer sets u_size to %lu sectors\n",
3820daeda1ccSPhilipp Reisner 				 (unsigned long)my_usize);
3821daeda1ccSPhilipp Reisner 		}
3822daeda1ccSPhilipp Reisner 
3823b30ab791SAndreas Gruenbacher 		put_ldev(device);
3824b411b363SPhilipp Reisner 	}
3825b411b363SPhilipp Reisner 
382620c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
382720c68fdeSLars Ellenberg 	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
382820c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
382920c68fdeSLars Ellenberg 	   drbd_reconsider_max_bio_size(), we can be sure that after
383020c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
383120c68fdeSLars Ellenberg 
3832e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
3833b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
38348fe39aacSPhilipp Reisner 		drbd_reconsider_max_bio_size(device, device->ldev);
3835b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
3836b30ab791SAndreas Gruenbacher 		put_ldev(device);
3837e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
383882bc0194SAndreas Gruenbacher 			return -EIO;
3839b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
3840b411b363SPhilipp Reisner 	} else {
38416a8d68b1SLars Ellenberg 		/*
38426a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
38436a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
38446a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
38456a8d68b1SLars Ellenberg 		 *
38466a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
38476a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
38486a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
38496a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
38506a8d68b1SLars Ellenberg 		 *
38516a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
38526a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
38536a8d68b1SLars Ellenberg 		 */
38548fe39aacSPhilipp Reisner 		drbd_reconsider_max_bio_size(device, NULL);
38556a8d68b1SLars Ellenberg 		drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
3856b411b363SPhilipp Reisner 	}
3857b411b363SPhilipp Reisner 
3858b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3859b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3860b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
3861b411b363SPhilipp Reisner 			ldsc = 1;
3862b411b363SPhilipp Reisner 		}
3863b411b363SPhilipp Reisner 
3864b30ab791SAndreas Gruenbacher 		put_ldev(device);
3865b411b363SPhilipp Reisner 	}
3866b411b363SPhilipp Reisner 
3867b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
3868b411b363SPhilipp Reisner 		if (be64_to_cpu(p->c_size) !=
3869b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) || ldsc) {
3870b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
3871b411b363SPhilipp Reisner 			 * needs to know my new size... */
387269a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
3873b411b363SPhilipp Reisner 		}
3874b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3875b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3876b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
3877b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
3878e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
3879d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
3880b411b363SPhilipp Reisner 				else
3881b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
3882e89b591cSPhilipp Reisner 			} else
3883b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
3884b411b363SPhilipp Reisner 		}
3885b411b363SPhilipp Reisner 	}
3886b411b363SPhilipp Reisner 
388782bc0194SAndreas Gruenbacher 	return 0;
3888b411b363SPhilipp Reisner }
3889b411b363SPhilipp Reisner 
3890bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
3891b411b363SPhilipp Reisner {
38929f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3893b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3894e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
3895b411b363SPhilipp Reisner 	u64 *p_uuid;
389662b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
3897b411b363SPhilipp Reisner 
38989f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38999f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3900bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
39019f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
39024a76b161SAndreas Gruenbacher 
3903b411b363SPhilipp Reisner 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3904063eacf8SJing Wang 	if (!p_uuid) {
3905d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc of p_uuid failed\n");
3906063eacf8SJing Wang 		return false;
3907063eacf8SJing Wang 	}
3908b411b363SPhilipp Reisner 
3909b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3910b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
3911b411b363SPhilipp Reisner 
3912b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
3913b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
3914b411b363SPhilipp Reisner 
3915b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED &&
3916b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
3917b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
3918b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3919d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
3920b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
39219f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
392282bc0194SAndreas Gruenbacher 		return -EIO;
3923b411b363SPhilipp Reisner 	}
3924b411b363SPhilipp Reisner 
3925b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3926b411b363SPhilipp Reisner 		int skip_initial_sync =
3927b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
39289f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
3929b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3930b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
3931b411b363SPhilipp Reisner 		if (skip_initial_sync) {
3932d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
3933b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
393420ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
393520ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
3936b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3937b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
3938b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3939b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
3940b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
394162b0da3aSLars Ellenberg 			updated_uuids = 1;
3942b411b363SPhilipp Reisner 		}
3943b30ab791SAndreas Gruenbacher 		put_ldev(device);
3944b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
3945b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
394618a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
394718a50fa2SPhilipp Reisner 		   for me. */
3948b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
3949b411b363SPhilipp Reisner 	}
3950b411b363SPhilipp Reisner 
3951b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
3952b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
3953b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
3954b411b363SPhilipp Reisner 	   new disk state... */
3955b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
3956b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
3957b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3958b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
395962b0da3aSLars Ellenberg 
396062b0da3aSLars Ellenberg 	if (updated_uuids)
3961b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
3962b411b363SPhilipp Reisner 
396382bc0194SAndreas Gruenbacher 	return 0;
3964b411b363SPhilipp Reisner }
3965b411b363SPhilipp Reisner 
3966b411b363SPhilipp Reisner /**
3967b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
3968b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
3969b411b363SPhilipp Reisner  */
3970b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
3971b411b363SPhilipp Reisner {
3972b411b363SPhilipp Reisner 	union drbd_state ms;
3973b411b363SPhilipp Reisner 
3974b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
3975369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
3976b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
3977b411b363SPhilipp Reisner 
3978b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3979b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3980b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3981b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
3982b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
3983b411b363SPhilipp Reisner 	};
3984b411b363SPhilipp Reisner 
3985b411b363SPhilipp Reisner 	ms.i = ps.i;
3986b411b363SPhilipp Reisner 
3987b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
3988b411b363SPhilipp Reisner 	ms.peer = ps.role;
3989b411b363SPhilipp Reisner 	ms.role = ps.peer;
3990b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
3991b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
3992b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3993b411b363SPhilipp Reisner 
3994b411b363SPhilipp Reisner 	return ms;
3995b411b363SPhilipp Reisner }
3996b411b363SPhilipp Reisner 
3997bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
3998b411b363SPhilipp Reisner {
39999f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4000b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4001e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4002b411b363SPhilipp Reisner 	union drbd_state mask, val;
4003bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4004b411b363SPhilipp Reisner 
40059f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40069f4fe9adSAndreas Gruenbacher 	if (!peer_device)
40074a76b161SAndreas Gruenbacher 		return -EIO;
40089f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
40094a76b161SAndreas Gruenbacher 
4010b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4011b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4012b411b363SPhilipp Reisner 
40139f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4014b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
401569a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
401682bc0194SAndreas Gruenbacher 		return 0;
4017b411b363SPhilipp Reisner 	}
4018b411b363SPhilipp Reisner 
4019b411b363SPhilipp Reisner 	mask = convert_state(mask);
4020b411b363SPhilipp Reisner 	val = convert_state(val);
4021b411b363SPhilipp Reisner 
4022b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
402369a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4024047cd4a6SPhilipp Reisner 
4025b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4026b411b363SPhilipp Reisner 
402782bc0194SAndreas Gruenbacher 	return 0;
4028b411b363SPhilipp Reisner }
4029b411b363SPhilipp Reisner 
4030bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4031b411b363SPhilipp Reisner {
4032e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4033dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4034dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4035dfafcc8aSPhilipp Reisner 
4036dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4037dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4038dfafcc8aSPhilipp Reisner 
4039bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4040bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4041bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
404282bc0194SAndreas Gruenbacher 		return 0;
4043dfafcc8aSPhilipp Reisner 	}
4044dfafcc8aSPhilipp Reisner 
4045dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4046dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4047dfafcc8aSPhilipp Reisner 
4048bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4049bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4050dfafcc8aSPhilipp Reisner 
405182bc0194SAndreas Gruenbacher 	return 0;
4052dfafcc8aSPhilipp Reisner }
4053dfafcc8aSPhilipp Reisner 
4054bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4055b411b363SPhilipp Reisner {
40569f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4057b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4058e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
40594ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4060b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
406165d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4062b411b363SPhilipp Reisner 	int rv;
4063b411b363SPhilipp Reisner 
40649f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40659f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4066bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
40679f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
40684a76b161SAndreas Gruenbacher 
4069b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4070b411b363SPhilipp Reisner 
4071b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4072b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4073b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4074d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4075b411b363SPhilipp Reisner 	}
4076b411b363SPhilipp Reisner 
40770500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4078b411b363SPhilipp Reisner  retry:
4079b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
40800500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4081b411b363SPhilipp Reisner 
4082668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4083545752d5SLars Ellenberg 	 * already decided to close the connection again,
4084545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4085545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
408658ffa580SLars Ellenberg 		return -ECONNRESET;
4087545752d5SLars Ellenberg 
408840424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
408940424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
409040424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
409140424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
409240424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
409340424e4aSLars Ellenberg 	 */
409440424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
409540424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4096e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4097e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4098e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4099e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4100e9ef7bb6SLars Ellenberg 		 * syncing states.
4101e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4102e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4103e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4104e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4105e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4106e9ef7bb6SLars Ellenberg 
4107e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4108e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4109e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4110e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4111e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4112b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4113b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
411482bc0194SAndreas Gruenbacher 			return 0;
4115e9ef7bb6SLars Ellenberg 		}
4116e9ef7bb6SLars Ellenberg 	}
4117e9ef7bb6SLars Ellenberg 
411802b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
411902b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
412002b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4121b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4122b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
412358ffa580SLars Ellenberg 		return 0;
412402b91b55SLars Ellenberg 	}
412502b91b55SLars Ellenberg 
4126e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4127e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4128e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4129e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4130e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4131e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4132e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4133e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4134e9ef7bb6SLars Ellenberg 
41354ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
41364ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4137b411b363SPhilipp Reisner 
413867531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
413967531718SPhilipp Reisner 		ns.conn = C_BEHIND;
414067531718SPhilipp Reisner 
4141b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4142b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4143b411b363SPhilipp Reisner 		int cr; /* consider resync */
4144b411b363SPhilipp Reisner 
4145b411b363SPhilipp Reisner 		/* if we established a new connection */
41464ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4147b411b363SPhilipp Reisner 		/* if we had an established connection
4148b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
41494ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4150b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
41514ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4152b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4153b411b363SPhilipp Reisner 		 * forced to be UpToDate with --overwrite-data */
4154b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4155b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4156b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
41574ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4158b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4159b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4160b411b363SPhilipp Reisner 
4161b411b363SPhilipp Reisner 		if (cr)
416269a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4163b411b363SPhilipp Reisner 
4164b30ab791SAndreas Gruenbacher 		put_ldev(device);
41654ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
41664ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4167b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4168b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4169b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4170d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4171b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4172580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4173b411b363SPhilipp Reisner 			} else {
41749f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
417582bc0194SAndreas Gruenbacher 					return -EIO;
41760b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
41779f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
417882bc0194SAndreas Gruenbacher 				return -EIO;
4179b411b363SPhilipp Reisner 			}
4180b411b363SPhilipp Reisner 		}
4181b411b363SPhilipp Reisner 	}
4182b411b363SPhilipp Reisner 
41830500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4184b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4185b411b363SPhilipp Reisner 		goto retry;
4186b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4187b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4188b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4189b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
41904ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4191b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
41924ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4193b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4194b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
41958554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4196481c6f50SPhilipp Reisner 		   for temporal network outages! */
41970500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4198d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
41999f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4200b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4201b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
42029f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
420382bc0194SAndreas Gruenbacher 		return -EIO;
4204481c6f50SPhilipp Reisner 	}
4205b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4206b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
42070500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4208b411b363SPhilipp Reisner 
4209b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
42109f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
421182bc0194SAndreas Gruenbacher 		return -EIO;
4212b411b363SPhilipp Reisner 	}
4213b411b363SPhilipp Reisner 
42144ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
42154ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4216b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4217b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4218b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4219b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
422069a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
422169a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4222b411b363SPhilipp Reisner 		}
4223b411b363SPhilipp Reisner 	}
4224b411b363SPhilipp Reisner 
4225b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4226b411b363SPhilipp Reisner 
4227b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4228b411b363SPhilipp Reisner 
422982bc0194SAndreas Gruenbacher 	return 0;
4230b411b363SPhilipp Reisner }
4231b411b363SPhilipp Reisner 
4232bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4233b411b363SPhilipp Reisner {
42349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4235b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4236e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
42374a76b161SAndreas Gruenbacher 
42389f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42399f4fe9adSAndreas Gruenbacher 	if (!peer_device)
42404a76b161SAndreas Gruenbacher 		return -EIO;
42419f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4242b411b363SPhilipp Reisner 
4243b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4244b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4245b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4246b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4247b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4248b411b363SPhilipp Reisner 
42490b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4250b411b363SPhilipp Reisner 
4251b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4252b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4253b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4254b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4255b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4256b411b363SPhilipp Reisner 
4257b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4258b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4259b411b363SPhilipp Reisner 
4260b30ab791SAndreas Gruenbacher 		put_ldev(device);
4261b411b363SPhilipp Reisner 	} else
4262d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4263b411b363SPhilipp Reisner 
426482bc0194SAndreas Gruenbacher 	return 0;
4265b411b363SPhilipp Reisner }
4266b411b363SPhilipp Reisner 
42672c46407dSAndreas Gruenbacher /**
42682c46407dSAndreas Gruenbacher  * receive_bitmap_plain
42692c46407dSAndreas Gruenbacher  *
42702c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
42712c46407dSAndreas Gruenbacher  * code upon failure.
42722c46407dSAndreas Gruenbacher  */
42732c46407dSAndreas Gruenbacher static int
427469a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4275e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4276b411b363SPhilipp Reisner {
427750d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
427869a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4279e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
428050d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4281e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
42822c46407dSAndreas Gruenbacher 	int err;
4283b411b363SPhilipp Reisner 
428450d0b1adSAndreas Gruenbacher 	if (want != size) {
428569a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
42862c46407dSAndreas Gruenbacher 		return -EIO;
4287b411b363SPhilipp Reisner 	}
4288b411b363SPhilipp Reisner 	if (want == 0)
42892c46407dSAndreas Gruenbacher 		return 0;
429069a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
429182bc0194SAndreas Gruenbacher 	if (err)
42922c46407dSAndreas Gruenbacher 		return err;
4293b411b363SPhilipp Reisner 
429469a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4295b411b363SPhilipp Reisner 
4296b411b363SPhilipp Reisner 	c->word_offset += num_words;
4297b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4298b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4299b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4300b411b363SPhilipp Reisner 
43012c46407dSAndreas Gruenbacher 	return 1;
4302b411b363SPhilipp Reisner }
4303b411b363SPhilipp Reisner 
4304a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4305a02d1240SAndreas Gruenbacher {
4306a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4307a02d1240SAndreas Gruenbacher }
4308a02d1240SAndreas Gruenbacher 
4309a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4310a02d1240SAndreas Gruenbacher {
4311a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4312a02d1240SAndreas Gruenbacher }
4313a02d1240SAndreas Gruenbacher 
4314a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4315a02d1240SAndreas Gruenbacher {
4316a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4317a02d1240SAndreas Gruenbacher }
4318a02d1240SAndreas Gruenbacher 
43192c46407dSAndreas Gruenbacher /**
43202c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
43212c46407dSAndreas Gruenbacher  *
43222c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
43232c46407dSAndreas Gruenbacher  * code upon failure.
43242c46407dSAndreas Gruenbacher  */
43252c46407dSAndreas Gruenbacher static int
432669a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4327b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4328c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4329c6d25cfeSPhilipp Reisner 		 unsigned int len)
4330b411b363SPhilipp Reisner {
4331b411b363SPhilipp Reisner 	struct bitstream bs;
4332b411b363SPhilipp Reisner 	u64 look_ahead;
4333b411b363SPhilipp Reisner 	u64 rl;
4334b411b363SPhilipp Reisner 	u64 tmp;
4335b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4336b411b363SPhilipp Reisner 	unsigned long e;
4337a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4338b411b363SPhilipp Reisner 	int have;
4339b411b363SPhilipp Reisner 	int bits;
4340b411b363SPhilipp Reisner 
4341a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4342b411b363SPhilipp Reisner 
4343b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4344b411b363SPhilipp Reisner 	if (bits < 0)
43452c46407dSAndreas Gruenbacher 		return -EIO;
4346b411b363SPhilipp Reisner 
4347b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4348b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4349b411b363SPhilipp Reisner 		if (bits <= 0)
43502c46407dSAndreas Gruenbacher 			return -EIO;
4351b411b363SPhilipp Reisner 
4352b411b363SPhilipp Reisner 		if (toggle) {
4353b411b363SPhilipp Reisner 			e = s + rl -1;
4354b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
435569a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
43562c46407dSAndreas Gruenbacher 				return -EIO;
4357b411b363SPhilipp Reisner 			}
435869a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4359b411b363SPhilipp Reisner 		}
4360b411b363SPhilipp Reisner 
4361b411b363SPhilipp Reisner 		if (have < bits) {
436269a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4363b411b363SPhilipp Reisner 				have, bits, look_ahead,
4364b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4365b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
43662c46407dSAndreas Gruenbacher 			return -EIO;
4367b411b363SPhilipp Reisner 		}
4368d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4369d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4370b411b363SPhilipp Reisner 			look_ahead >>= bits;
4371d2da5b0cSLars Ellenberg 		else
4372d2da5b0cSLars Ellenberg 			look_ahead = 0;
4373b411b363SPhilipp Reisner 		have -= bits;
4374b411b363SPhilipp Reisner 
4375b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4376b411b363SPhilipp Reisner 		if (bits < 0)
43772c46407dSAndreas Gruenbacher 			return -EIO;
4378b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4379b411b363SPhilipp Reisner 		have += bits;
4380b411b363SPhilipp Reisner 	}
4381b411b363SPhilipp Reisner 
4382b411b363SPhilipp Reisner 	c->bit_offset = s;
4383b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4384b411b363SPhilipp Reisner 
43852c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4386b411b363SPhilipp Reisner }
4387b411b363SPhilipp Reisner 
43882c46407dSAndreas Gruenbacher /**
43892c46407dSAndreas Gruenbacher  * decode_bitmap_c
43902c46407dSAndreas Gruenbacher  *
43912c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
43922c46407dSAndreas Gruenbacher  * code upon failure.
43932c46407dSAndreas Gruenbacher  */
43942c46407dSAndreas Gruenbacher static int
439569a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4396b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4397c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4398c6d25cfeSPhilipp Reisner 		unsigned int len)
4399b411b363SPhilipp Reisner {
4400a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
440169a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4402b411b363SPhilipp Reisner 
4403b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4404b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4405b411b363SPhilipp Reisner 	 * during all our tests. */
4406b411b363SPhilipp Reisner 
440769a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
440869a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
44092c46407dSAndreas Gruenbacher 	return -EIO;
4410b411b363SPhilipp Reisner }
4411b411b363SPhilipp Reisner 
4412b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4413b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4414b411b363SPhilipp Reisner {
4415b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4416a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
441750d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
441850d0b1adSAndreas Gruenbacher 	unsigned int plain =
441950d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
442050d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
442150d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
442250d0b1adSAndreas Gruenbacher 	unsigned int r;
4423b411b363SPhilipp Reisner 
4424b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4425b411b363SPhilipp Reisner 	if (total == 0)
4426b411b363SPhilipp Reisner 		return;
4427b411b363SPhilipp Reisner 
4428b411b363SPhilipp Reisner 	/* don't report if not compressed */
4429b411b363SPhilipp Reisner 	if (total >= plain)
4430b411b363SPhilipp Reisner 		return;
4431b411b363SPhilipp Reisner 
4432b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4433b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4434b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4435b411b363SPhilipp Reisner 
4436b411b363SPhilipp Reisner 	if (r > 1000)
4437b411b363SPhilipp Reisner 		r = 1000;
4438b411b363SPhilipp Reisner 
4439b411b363SPhilipp Reisner 	r = 1000 - r;
4440d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4441b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4442b411b363SPhilipp Reisner 			direction,
4443b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4444b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4445b411b363SPhilipp Reisner 			total, r/10, r % 10);
4446b411b363SPhilipp Reisner }
4447b411b363SPhilipp Reisner 
4448b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4449b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4450b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4451b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4452b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4453b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4454b411b363SPhilipp Reisner 
4455b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4456bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4457b411b363SPhilipp Reisner {
44589f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4459b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4460b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
44612c46407dSAndreas Gruenbacher 	int err;
44624a76b161SAndreas Gruenbacher 
44639f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44649f4fe9adSAndreas Gruenbacher 	if (!peer_device)
44654a76b161SAndreas Gruenbacher 		return -EIO;
44669f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4467b411b363SPhilipp Reisner 
4468b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
446920ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
447020ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4471b411b363SPhilipp Reisner 
4472b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4473b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4474b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4475b411b363SPhilipp Reisner 	};
4476b411b363SPhilipp Reisner 
44772c46407dSAndreas Gruenbacher 	for(;;) {
4478e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
447969a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4480e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4481b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4482b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4483e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4484b411b363SPhilipp Reisner 
4485bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4486d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
448782bc0194SAndreas Gruenbacher 				err = -EIO;
4488b411b363SPhilipp Reisner 				goto out;
4489b411b363SPhilipp Reisner 			}
4490e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4491d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
449282bc0194SAndreas Gruenbacher 				err = -EIO;
449378fcbdaeSAndreas Gruenbacher 				goto out;
4494b411b363SPhilipp Reisner 			}
44959f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4496e658983aSAndreas Gruenbacher 			if (err)
4497e658983aSAndreas Gruenbacher 			       goto out;
449869a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4499b411b363SPhilipp Reisner 		} else {
4500d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
450182bc0194SAndreas Gruenbacher 			err = -EIO;
4502b411b363SPhilipp Reisner 			goto out;
4503b411b363SPhilipp Reisner 		}
4504b411b363SPhilipp Reisner 
4505e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4506bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4507b411b363SPhilipp Reisner 
45082c46407dSAndreas Gruenbacher 		if (err <= 0) {
45092c46407dSAndreas Gruenbacher 			if (err < 0)
45102c46407dSAndreas Gruenbacher 				goto out;
4511b411b363SPhilipp Reisner 			break;
45122c46407dSAndreas Gruenbacher 		}
45139f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
451482bc0194SAndreas Gruenbacher 		if (err)
4515b411b363SPhilipp Reisner 			goto out;
45162c46407dSAndreas Gruenbacher 	}
4517b411b363SPhilipp Reisner 
4518b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4519b411b363SPhilipp Reisner 
4520b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4521de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4522de1f8e4aSAndreas Gruenbacher 
4523b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
452482bc0194SAndreas Gruenbacher 		if (err)
4525b411b363SPhilipp Reisner 			goto out;
4526b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4527b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
45280b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4529b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4530b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4531b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4532d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4533b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4534b411b363SPhilipp Reisner 	}
453582bc0194SAndreas Gruenbacher 	err = 0;
4536b411b363SPhilipp Reisner 
4537b411b363SPhilipp Reisner  out:
4538b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4539b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4540b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
454182bc0194SAndreas Gruenbacher 	return err;
4542b411b363SPhilipp Reisner }
4543b411b363SPhilipp Reisner 
4544bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4545b411b363SPhilipp Reisner {
45461ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4547e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4548b411b363SPhilipp Reisner 
4549bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4550b411b363SPhilipp Reisner }
4551b411b363SPhilipp Reisner 
4552bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4553b411b363SPhilipp Reisner {
4554b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4555b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4556bde89a9eSAndreas Gruenbacher 	drbd_tcp_quickack(connection->data.socket);
4557b411b363SPhilipp Reisner 
455882bc0194SAndreas Gruenbacher 	return 0;
4559b411b363SPhilipp Reisner }
4560b411b363SPhilipp Reisner 
4561bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
456273a01a18SPhilipp Reisner {
45639f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4564b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4565e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
45664a76b161SAndreas Gruenbacher 
45679f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
45689f4fe9adSAndreas Gruenbacher 	if (!peer_device)
45694a76b161SAndreas Gruenbacher 		return -EIO;
45709f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
457173a01a18SPhilipp Reisner 
4572b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4573f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4574f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4575f735e363SLars Ellenberg 	case C_BEHIND:
4576f735e363SLars Ellenberg 			break;
4577f735e363SLars Ellenberg 	default:
4578d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4579b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4580f735e363SLars Ellenberg 	}
4581f735e363SLars Ellenberg 
4582b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
458373a01a18SPhilipp Reisner 
458482bc0194SAndreas Gruenbacher 	return 0;
458573a01a18SPhilipp Reisner }
458673a01a18SPhilipp Reisner 
458702918be2SPhilipp Reisner struct data_cmd {
458802918be2SPhilipp Reisner 	int expect_payload;
458902918be2SPhilipp Reisner 	size_t pkt_size;
4590bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
4591b411b363SPhilipp Reisner };
4592b411b363SPhilipp Reisner 
459302918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
459402918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
459502918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
459602918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
459702918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
4598e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
4599e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4600e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
460102918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
460202918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4603e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
4604e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
460502918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
460602918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
460702918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
460802918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
460902918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
461002918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
461102918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
461202918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
461302918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
461402918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
461573a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
46164a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
4617036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
4618a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
461902918be2SPhilipp Reisner };
462002918be2SPhilipp Reisner 
4621bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
4622b411b363SPhilipp Reisner {
462377351055SPhilipp Reisner 	struct packet_info pi;
462402918be2SPhilipp Reisner 	size_t shs; /* sub header size */
462582bc0194SAndreas Gruenbacher 	int err;
4626b411b363SPhilipp Reisner 
4627bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
4628deebe195SAndreas Gruenbacher 		struct data_cmd *cmd;
4629deebe195SAndreas Gruenbacher 
4630bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
4631944410e9SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header);
4632bde89a9eSAndreas Gruenbacher 		if (drbd_recv_header(connection, &pi))
463302918be2SPhilipp Reisner 			goto err_out;
463402918be2SPhilipp Reisner 
4635deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
46364a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
46371ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
46382fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
463902918be2SPhilipp Reisner 			goto err_out;
46400b33a916SLars Ellenberg 		}
4641b411b363SPhilipp Reisner 
4642e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
4643e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
46441ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
46452fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
4646c13f7e1aSLars Ellenberg 			goto err_out;
4647c13f7e1aSLars Ellenberg 		}
4648c13f7e1aSLars Ellenberg 
4649c13f7e1aSLars Ellenberg 		if (shs) {
4650944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
4651bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
4652a5c31904SAndreas Gruenbacher 			if (err)
465302918be2SPhilipp Reisner 				goto err_out;
4654e2857216SAndreas Gruenbacher 			pi.size -= shs;
4655b411b363SPhilipp Reisner 		}
465602918be2SPhilipp Reisner 
4657944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
4658bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
46594a76b161SAndreas Gruenbacher 		if (err) {
46601ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
46619f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
466202918be2SPhilipp Reisner 			goto err_out;
466302918be2SPhilipp Reisner 		}
466402918be2SPhilipp Reisner 	}
466582bc0194SAndreas Gruenbacher 	return;
466602918be2SPhilipp Reisner 
466702918be2SPhilipp Reisner     err_out:
4668bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
4669b411b363SPhilipp Reisner }
4670b411b363SPhilipp Reisner 
4671bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
4672f70b3511SPhilipp Reisner {
4673c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4674bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
4675376694a0SPhilipp Reisner 	int vnr;
4676f70b3511SPhilipp Reisner 
4677bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
4678b411b363SPhilipp Reisner 		return;
4679b411b363SPhilipp Reisner 
4680545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
4681545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
4682545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
4683545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
4684545752d5SLars Ellenberg 	 */
4685bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4686545752d5SLars Ellenberg 
4687668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
46881c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
4689668700b4SPhilipp Reisner 	if (connection->ack_sender) {
4690668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
4691668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
4692668700b4SPhilipp Reisner 	}
4693bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
4694360cc740SPhilipp Reisner 
4695c141ebdaSPhilipp Reisner 	rcu_read_lock();
4696c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4697c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
4698b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
4699c141ebdaSPhilipp Reisner 		rcu_read_unlock();
470069a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
4701c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
4702c141ebdaSPhilipp Reisner 		rcu_read_lock();
4703c141ebdaSPhilipp Reisner 	}
4704c141ebdaSPhilipp Reisner 	rcu_read_unlock();
4705c141ebdaSPhilipp Reisner 
4706bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
47071ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
470812038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4709bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
4710bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
471112038a3aSPhilipp Reisner 
47121ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
4713360cc740SPhilipp Reisner 
4714bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4715bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
4716cb703454SPhilipp Reisner 
47170500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
4718bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
4719bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
4720bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4721bbeb641cSPhilipp Reisner 
47220500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
4723360cc740SPhilipp Reisner 
4724f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
4725bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
4726360cc740SPhilipp Reisner }
4727360cc740SPhilipp Reisner 
472869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
4729360cc740SPhilipp Reisner {
473069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
4731360cc740SPhilipp Reisner 	unsigned int i;
4732b411b363SPhilipp Reisner 
473385719573SPhilipp Reisner 	/* wait for current activity to cease. */
47340500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4735b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
4736b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
4737b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
47380500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4739b411b363SPhilipp Reisner 
4740b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
4741b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
4742b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
4743b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
4744b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
4745b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4746b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
4747b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
4748b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
4749b411b363SPhilipp Reisner 	 *  on the fly. */
4750b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
4751b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
4752b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
4753b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
4754b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
4755b411b363SPhilipp Reisner 
4756b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
4757b30ab791SAndreas Gruenbacher 	resync_timer_fn((unsigned long)device);
4758b411b363SPhilipp Reisner 
4759b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4760b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
4761b411b363SPhilipp Reisner 	 * to be "canceled" */
4762b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
4763b411b363SPhilipp Reisner 
4764b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
4765b411b363SPhilipp Reisner 
4766d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4767d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
4768d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4769b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
4770d10b4ea3SPhilipp Reisner 
477108332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
477208332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
4773b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
4774b411b363SPhilipp Reisner 
4775b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4776b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
4777b411b363SPhilipp Reisner 
4778b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
477969a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
4780b411b363SPhilipp Reisner 
4781b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4782b411b363SPhilipp Reisner 
478320ceb2b2SLars Ellenberg 	/* serialize with bitmap writeout triggered by the state change,
478420ceb2b2SLars Ellenberg 	 * if any. */
4785b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
478620ceb2b2SLars Ellenberg 
4787b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
4788b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
4789b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
4790b411b363SPhilipp Reisner 	 *
4791b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
4792b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
4793b411b363SPhilipp Reisner 	 */
4794b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
4795b411b363SPhilipp Reisner 	if (i)
4796d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
4797b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
4798435f0740SLars Ellenberg 	if (i)
4799d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
4800b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
4801b411b363SPhilipp Reisner 	if (i)
4802d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
4803b411b363SPhilipp Reisner 
48040b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
48050b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
48060b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
48070b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
4808b411b363SPhilipp Reisner 
4809360cc740SPhilipp Reisner 	return 0;
4810b411b363SPhilipp Reisner }
4811b411b363SPhilipp Reisner 
4812b411b363SPhilipp Reisner /*
4813b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4814b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
4815b411b363SPhilipp Reisner  *
4816b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
4817b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
4818b411b363SPhilipp Reisner  *
4819b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
4820b411b363SPhilipp Reisner  */
4821bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
4822b411b363SPhilipp Reisner {
48239f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
48249f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
4825b411b363SPhilipp Reisner 
4826bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
4827bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
48289f5bdc33SAndreas Gruenbacher 	if (!p)
4829e8d17b01SAndreas Gruenbacher 		return -EIO;
4830b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
4831b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4832b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
483320c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
4834bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
4835b411b363SPhilipp Reisner }
4836b411b363SPhilipp Reisner 
4837b411b363SPhilipp Reisner /*
4838b411b363SPhilipp Reisner  * return values:
4839b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
4840b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
4841b411b363SPhilipp Reisner  *  -1 peer talks different language,
4842b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
4843b411b363SPhilipp Reisner  */
4844bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
4845b411b363SPhilipp Reisner {
4846bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
4847e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
4848e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
484977351055SPhilipp Reisner 	struct packet_info pi;
4850a5c31904SAndreas Gruenbacher 	int err;
4851b411b363SPhilipp Reisner 
4852bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
4853e8d17b01SAndreas Gruenbacher 	if (err)
4854b411b363SPhilipp Reisner 		return 0;
4855b411b363SPhilipp Reisner 
4856bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
485769bc7bc3SAndreas Gruenbacher 	if (err)
4858b411b363SPhilipp Reisner 		return 0;
4859b411b363SPhilipp Reisner 
48606038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
48611ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
486277351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4863b411b363SPhilipp Reisner 		return -1;
4864b411b363SPhilipp Reisner 	}
4865b411b363SPhilipp Reisner 
486677351055SPhilipp Reisner 	if (pi.size != expect) {
48671ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
486877351055SPhilipp Reisner 		     expect, pi.size);
4869b411b363SPhilipp Reisner 		return -1;
4870b411b363SPhilipp Reisner 	}
4871b411b363SPhilipp Reisner 
4872e658983aSAndreas Gruenbacher 	p = pi.data;
4873bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
4874a5c31904SAndreas Gruenbacher 	if (err)
4875b411b363SPhilipp Reisner 		return 0;
4876b411b363SPhilipp Reisner 
4877b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
4878b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
4879b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
4880b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
4881b411b363SPhilipp Reisner 
4882b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
4883b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
4884b411b363SPhilipp Reisner 		goto incompat;
4885b411b363SPhilipp Reisner 
4886bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
488720c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
4888b411b363SPhilipp Reisner 
48891ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
4890bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
4891b411b363SPhilipp Reisner 
489220c68fdeSLars Ellenberg 	drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
489320c68fdeSLars Ellenberg 		  connection->agreed_features & FF_TRIM ? " " : " not ");
489420c68fdeSLars Ellenberg 
4895b411b363SPhilipp Reisner 	return 1;
4896b411b363SPhilipp Reisner 
4897b411b363SPhilipp Reisner  incompat:
48981ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
4899b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
4900b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
4901b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
4902b411b363SPhilipp Reisner 	return -1;
4903b411b363SPhilipp Reisner }
4904b411b363SPhilipp Reisner 
4905b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4906bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
4907b411b363SPhilipp Reisner {
49081ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
49091ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
4910b10d96cbSJohannes Thoma 	return -1;
4911b411b363SPhilipp Reisner }
4912b411b363SPhilipp Reisner #else
4913b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
4914b10d96cbSJohannes Thoma 
4915b10d96cbSJohannes Thoma /* Return value:
4916b10d96cbSJohannes Thoma 	1 - auth succeeded,
4917b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
4918b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
4919b10d96cbSJohannes Thoma */
4920b10d96cbSJohannes Thoma 
4921bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
4922b411b363SPhilipp Reisner {
49239f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
4924b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
4925b411b363SPhilipp Reisner 	struct scatterlist sg;
4926b411b363SPhilipp Reisner 	char *response = NULL;
4927b411b363SPhilipp Reisner 	char *right_response = NULL;
4928b411b363SPhilipp Reisner 	char *peers_ch = NULL;
492944ed167dSPhilipp Reisner 	unsigned int key_len;
493044ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
4931b411b363SPhilipp Reisner 	unsigned int resp_size;
4932b411b363SPhilipp Reisner 	struct hash_desc desc;
493377351055SPhilipp Reisner 	struct packet_info pi;
493444ed167dSPhilipp Reisner 	struct net_conf *nc;
493569bc7bc3SAndreas Gruenbacher 	int err, rv;
4936b411b363SPhilipp Reisner 
49379f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
49389f5bdc33SAndreas Gruenbacher 
493944ed167dSPhilipp Reisner 	rcu_read_lock();
4940bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
494144ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
494244ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
494344ed167dSPhilipp Reisner 	rcu_read_unlock();
494444ed167dSPhilipp Reisner 
4945bde89a9eSAndreas Gruenbacher 	desc.tfm = connection->cram_hmac_tfm;
4946b411b363SPhilipp Reisner 	desc.flags = 0;
4947b411b363SPhilipp Reisner 
4948bde89a9eSAndreas Gruenbacher 	rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
4949b411b363SPhilipp Reisner 	if (rv) {
49501ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
4951b10d96cbSJohannes Thoma 		rv = -1;
4952b411b363SPhilipp Reisner 		goto fail;
4953b411b363SPhilipp Reisner 	}
4954b411b363SPhilipp Reisner 
4955b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
4956b411b363SPhilipp Reisner 
4957bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
4958bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
49599f5bdc33SAndreas Gruenbacher 		rv = 0;
49609f5bdc33SAndreas Gruenbacher 		goto fail;
49619f5bdc33SAndreas Gruenbacher 	}
4962bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
49639f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
4964b411b363SPhilipp Reisner 	if (!rv)
4965b411b363SPhilipp Reisner 		goto fail;
4966b411b363SPhilipp Reisner 
4967bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
496869bc7bc3SAndreas Gruenbacher 	if (err) {
4969b411b363SPhilipp Reisner 		rv = 0;
4970b411b363SPhilipp Reisner 		goto fail;
4971b411b363SPhilipp Reisner 	}
4972b411b363SPhilipp Reisner 
497377351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
49741ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
497577351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4976b411b363SPhilipp Reisner 		rv = 0;
4977b411b363SPhilipp Reisner 		goto fail;
4978b411b363SPhilipp Reisner 	}
4979b411b363SPhilipp Reisner 
498077351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
49811ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
4982b10d96cbSJohannes Thoma 		rv = -1;
4983b411b363SPhilipp Reisner 		goto fail;
4984b411b363SPhilipp Reisner 	}
4985b411b363SPhilipp Reisner 
498667cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
498767cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
498867cca286SPhilipp Reisner 		rv = -1;
498967cca286SPhilipp Reisner 		goto fail;
499067cca286SPhilipp Reisner 	}
499167cca286SPhilipp Reisner 
499277351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
4993b411b363SPhilipp Reisner 	if (peers_ch == NULL) {
49941ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of peers_ch failed\n");
4995b10d96cbSJohannes Thoma 		rv = -1;
4996b411b363SPhilipp Reisner 		goto fail;
4997b411b363SPhilipp Reisner 	}
4998b411b363SPhilipp Reisner 
4999bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5000a5c31904SAndreas Gruenbacher 	if (err) {
5001b411b363SPhilipp Reisner 		rv = 0;
5002b411b363SPhilipp Reisner 		goto fail;
5003b411b363SPhilipp Reisner 	}
5004b411b363SPhilipp Reisner 
500567cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
500667cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
500767cca286SPhilipp Reisner 		rv = -1;
500867cca286SPhilipp Reisner 		goto fail;
500967cca286SPhilipp Reisner 	}
501067cca286SPhilipp Reisner 
5011bde89a9eSAndreas Gruenbacher 	resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
5012b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
5013b411b363SPhilipp Reisner 	if (response == NULL) {
50141ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of response failed\n");
5015b10d96cbSJohannes Thoma 		rv = -1;
5016b411b363SPhilipp Reisner 		goto fail;
5017b411b363SPhilipp Reisner 	}
5018b411b363SPhilipp Reisner 
5019b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
502077351055SPhilipp Reisner 	sg_set_buf(&sg, peers_ch, pi.size);
5021b411b363SPhilipp Reisner 
5022b411b363SPhilipp Reisner 	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
5023b411b363SPhilipp Reisner 	if (rv) {
50241ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5025b10d96cbSJohannes Thoma 		rv = -1;
5026b411b363SPhilipp Reisner 		goto fail;
5027b411b363SPhilipp Reisner 	}
5028b411b363SPhilipp Reisner 
5029bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
50309f5bdc33SAndreas Gruenbacher 		rv = 0;
50319f5bdc33SAndreas Gruenbacher 		goto fail;
50329f5bdc33SAndreas Gruenbacher 	}
5033bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
50349f5bdc33SAndreas Gruenbacher 				response, resp_size);
5035b411b363SPhilipp Reisner 	if (!rv)
5036b411b363SPhilipp Reisner 		goto fail;
5037b411b363SPhilipp Reisner 
5038bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
503969bc7bc3SAndreas Gruenbacher 	if (err) {
5040b411b363SPhilipp Reisner 		rv = 0;
5041b411b363SPhilipp Reisner 		goto fail;
5042b411b363SPhilipp Reisner 	}
5043b411b363SPhilipp Reisner 
504477351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
50451ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
504677351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5047b411b363SPhilipp Reisner 		rv = 0;
5048b411b363SPhilipp Reisner 		goto fail;
5049b411b363SPhilipp Reisner 	}
5050b411b363SPhilipp Reisner 
505177351055SPhilipp Reisner 	if (pi.size != resp_size) {
50521ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5053b411b363SPhilipp Reisner 		rv = 0;
5054b411b363SPhilipp Reisner 		goto fail;
5055b411b363SPhilipp Reisner 	}
5056b411b363SPhilipp Reisner 
5057bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5058a5c31904SAndreas Gruenbacher 	if (err) {
5059b411b363SPhilipp Reisner 		rv = 0;
5060b411b363SPhilipp Reisner 		goto fail;
5061b411b363SPhilipp Reisner 	}
5062b411b363SPhilipp Reisner 
5063b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
50642d1ee87dSJulia Lawall 	if (right_response == NULL) {
50651ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of right_response failed\n");
5066b10d96cbSJohannes Thoma 		rv = -1;
5067b411b363SPhilipp Reisner 		goto fail;
5068b411b363SPhilipp Reisner 	}
5069b411b363SPhilipp Reisner 
5070b411b363SPhilipp Reisner 	sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
5071b411b363SPhilipp Reisner 
5072b411b363SPhilipp Reisner 	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
5073b411b363SPhilipp Reisner 	if (rv) {
50741ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5075b10d96cbSJohannes Thoma 		rv = -1;
5076b411b363SPhilipp Reisner 		goto fail;
5077b411b363SPhilipp Reisner 	}
5078b411b363SPhilipp Reisner 
5079b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5080b411b363SPhilipp Reisner 
5081b411b363SPhilipp Reisner 	if (rv)
50821ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
508344ed167dSPhilipp Reisner 		     resp_size);
5084b10d96cbSJohannes Thoma 	else
5085b10d96cbSJohannes Thoma 		rv = -1;
5086b411b363SPhilipp Reisner 
5087b411b363SPhilipp Reisner  fail:
5088b411b363SPhilipp Reisner 	kfree(peers_ch);
5089b411b363SPhilipp Reisner 	kfree(response);
5090b411b363SPhilipp Reisner 	kfree(right_response);
5091b411b363SPhilipp Reisner 
5092b411b363SPhilipp Reisner 	return rv;
5093b411b363SPhilipp Reisner }
5094b411b363SPhilipp Reisner #endif
5095b411b363SPhilipp Reisner 
50968fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5097b411b363SPhilipp Reisner {
5098bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5099b411b363SPhilipp Reisner 	int h;
5100b411b363SPhilipp Reisner 
51011ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5102b411b363SPhilipp Reisner 
5103b411b363SPhilipp Reisner 	do {
5104bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5105b411b363SPhilipp Reisner 		if (h == 0) {
5106bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
510720ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5108b411b363SPhilipp Reisner 		}
5109b411b363SPhilipp Reisner 		if (h == -1) {
51101ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5111bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5112b411b363SPhilipp Reisner 		}
5113b411b363SPhilipp Reisner 	} while (h == 0);
5114b411b363SPhilipp Reisner 
511591fd4dadSPhilipp Reisner 	if (h > 0)
5116bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5117b411b363SPhilipp Reisner 
5118bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5119b411b363SPhilipp Reisner 
51201ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5121b411b363SPhilipp Reisner 	return 0;
5122b411b363SPhilipp Reisner }
5123b411b363SPhilipp Reisner 
5124b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5125b411b363SPhilipp Reisner 
5126bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5127b411b363SPhilipp Reisner {
5128e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5129b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5130b411b363SPhilipp Reisner 
5131b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5132bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5133b411b363SPhilipp Reisner 	} else {
5134bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
51351ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5136fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5137fc3b10a4SPhilipp Reisner 	}
5138bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5139e4f78edeSPhilipp Reisner 
51402735a594SAndreas Gruenbacher 	return 0;
5141fc3b10a4SPhilipp Reisner }
5142e4f78edeSPhilipp Reisner 
5143bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5144e4f78edeSPhilipp Reisner {
51459f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5146b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5147e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5148e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5149e4f78edeSPhilipp Reisner 
51509f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
51519f4fe9adSAndreas Gruenbacher 	if (!peer_device)
51522735a594SAndreas Gruenbacher 		return -EIO;
51539f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
51541952e916SAndreas Gruenbacher 
5155bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
51560b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5157bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
51584d0fc3fdSPhilipp Reisner 	}
51594d0fc3fdSPhilipp Reisner 
5160e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5161b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5162e4f78edeSPhilipp Reisner 	} else {
5163b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5164d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5165b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5166b411b363SPhilipp Reisner 	}
5167b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5168b411b363SPhilipp Reisner 
51692735a594SAndreas Gruenbacher 	return 0;
5170b411b363SPhilipp Reisner }
5171b411b363SPhilipp Reisner 
5172bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5173b411b363SPhilipp Reisner {
5174bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5175b411b363SPhilipp Reisner 
5176b411b363SPhilipp Reisner }
5177b411b363SPhilipp Reisner 
5178bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5179b411b363SPhilipp Reisner {
5180b411b363SPhilipp Reisner 	/* restore idle timeout */
5181bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5182bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5183bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5184b411b363SPhilipp Reisner 
51852735a594SAndreas Gruenbacher 	return 0;
5186b411b363SPhilipp Reisner }
5187b411b363SPhilipp Reisner 
5188bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5189b411b363SPhilipp Reisner {
51909f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5191b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5192e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5193b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5194b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5195b411b363SPhilipp Reisner 
51969f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
51979f4fe9adSAndreas Gruenbacher 	if (!peer_device)
51982735a594SAndreas Gruenbacher 		return -EIO;
51999f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
52001952e916SAndreas Gruenbacher 
52019f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5202b411b363SPhilipp Reisner 
520369a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5204b411b363SPhilipp Reisner 
5205b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5206b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5207b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5208b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5209b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5210b30ab791SAndreas Gruenbacher 		put_ldev(device);
52111d53f09eSLars Ellenberg 	}
5212b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5213b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5214b411b363SPhilipp Reisner 
52152735a594SAndreas Gruenbacher 	return 0;
5216b411b363SPhilipp Reisner }
5217b411b363SPhilipp Reisner 
5218bc9c5c41SAndreas Gruenbacher static int
5219b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5220bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5221bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5222b411b363SPhilipp Reisner {
5223b411b363SPhilipp Reisner 	struct drbd_request *req;
5224b411b363SPhilipp Reisner 	struct bio_and_error m;
5225b411b363SPhilipp Reisner 
52260500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5227b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5228b411b363SPhilipp Reisner 	if (unlikely(!req)) {
52290500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
523085997675SAndreas Gruenbacher 		return -EIO;
5231b411b363SPhilipp Reisner 	}
5232b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
52330500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5234b411b363SPhilipp Reisner 
5235b411b363SPhilipp Reisner 	if (m.bio)
5236b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
523785997675SAndreas Gruenbacher 	return 0;
5238b411b363SPhilipp Reisner }
5239b411b363SPhilipp Reisner 
5240bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5241b411b363SPhilipp Reisner {
52429f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5243b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5244e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5245b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5246b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5247b411b363SPhilipp Reisner 	enum drbd_req_event what;
5248b411b363SPhilipp Reisner 
52499f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
52509f4fe9adSAndreas Gruenbacher 	if (!peer_device)
52512735a594SAndreas Gruenbacher 		return -EIO;
52529f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
52531952e916SAndreas Gruenbacher 
525469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5255b411b363SPhilipp Reisner 
5256579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5257b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5258b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
52592735a594SAndreas Gruenbacher 		return 0;
5260b411b363SPhilipp Reisner 	}
5261e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5262b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
52638554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5264b411b363SPhilipp Reisner 		break;
5265b411b363SPhilipp Reisner 	case P_WRITE_ACK:
52668554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5267b411b363SPhilipp Reisner 		break;
5268b411b363SPhilipp Reisner 	case P_RECV_ACK:
52698554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5270b411b363SPhilipp Reisner 		break;
5271d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5272d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
52737be8da07SAndreas Gruenbacher 		break;
52747be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
52757be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5276b411b363SPhilipp Reisner 		break;
5277b411b363SPhilipp Reisner 	default:
52782735a594SAndreas Gruenbacher 		BUG();
5279b411b363SPhilipp Reisner 	}
5280b411b363SPhilipp Reisner 
5281b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5282b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5283bc9c5c41SAndreas Gruenbacher 					     what, false);
5284b411b363SPhilipp Reisner }
5285b411b363SPhilipp Reisner 
5286bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5287b411b363SPhilipp Reisner {
52889f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5289b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5290e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5291b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
52922deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
529385997675SAndreas Gruenbacher 	int err;
5294b411b363SPhilipp Reisner 
52959f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
52969f4fe9adSAndreas Gruenbacher 	if (!peer_device)
52972735a594SAndreas Gruenbacher 		return -EIO;
52989f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5299b411b363SPhilipp Reisner 
530069a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5301b411b363SPhilipp Reisner 
5302579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5303b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5304b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
53052735a594SAndreas Gruenbacher 		return 0;
5306b411b363SPhilipp Reisner 	}
53072deb8336SPhilipp Reisner 
5308b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5309b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5310303d1448SPhilipp Reisner 					    NEG_ACKED, true);
531185997675SAndreas Gruenbacher 	if (err) {
53122deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
53132deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5314c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
53152deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
53162deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5317b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
53182deb8336SPhilipp Reisner 	}
53192735a594SAndreas Gruenbacher 	return 0;
5320b411b363SPhilipp Reisner }
5321b411b363SPhilipp Reisner 
5322bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5323b411b363SPhilipp Reisner {
53249f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5325b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5326e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5327b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5328b411b363SPhilipp Reisner 
53299f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
53309f4fe9adSAndreas Gruenbacher 	if (!peer_device)
53312735a594SAndreas Gruenbacher 		return -EIO;
53329f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
53331952e916SAndreas Gruenbacher 
533469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
53357be8da07SAndreas Gruenbacher 
5336d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5337b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5338b411b363SPhilipp Reisner 
5339b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5340b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
53418554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5342b411b363SPhilipp Reisner }
5343b411b363SPhilipp Reisner 
5344bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5345b411b363SPhilipp Reisner {
53469f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5347b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5348b411b363SPhilipp Reisner 	sector_t sector;
5349b411b363SPhilipp Reisner 	int size;
5350e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
53511952e916SAndreas Gruenbacher 
53529f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
53539f4fe9adSAndreas Gruenbacher 	if (!peer_device)
53542735a594SAndreas Gruenbacher 		return -EIO;
53559f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5356b411b363SPhilipp Reisner 
5357b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5358b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5359b411b363SPhilipp Reisner 
536069a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5361b411b363SPhilipp Reisner 
5362b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5363b411b363SPhilipp Reisner 
5364b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5365b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5366e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5367d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5368b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
5369d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5370d612d309SPhilipp Reisner 			break;
5371d612d309SPhilipp Reisner 		default:
53722735a594SAndreas Gruenbacher 			BUG();
5373d612d309SPhilipp Reisner 		}
5374b30ab791SAndreas Gruenbacher 		put_ldev(device);
5375b411b363SPhilipp Reisner 	}
5376b411b363SPhilipp Reisner 
53772735a594SAndreas Gruenbacher 	return 0;
5378b411b363SPhilipp Reisner }
5379b411b363SPhilipp Reisner 
5380bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5381b411b363SPhilipp Reisner {
5382e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5383c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
53849ed57dcbSLars Ellenberg 	int vnr;
5385b411b363SPhilipp Reisner 
5386bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5387b411b363SPhilipp Reisner 
53889ed57dcbSLars Ellenberg 	rcu_read_lock();
5389c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5390c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5391c06ece6bSAndreas Gruenbacher 
5392b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5393b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5394b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5395b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5396b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5397c4752ef1SPhilipp Reisner 		}
53989ed57dcbSLars Ellenberg 	}
53999ed57dcbSLars Ellenberg 	rcu_read_unlock();
5400c4752ef1SPhilipp Reisner 
54012735a594SAndreas Gruenbacher 	return 0;
5402b411b363SPhilipp Reisner }
5403b411b363SPhilipp Reisner 
5404bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5405b411b363SPhilipp Reisner {
54069f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5407b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5408e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
540984b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5410b411b363SPhilipp Reisner 	sector_t sector;
5411b411b363SPhilipp Reisner 	int size;
5412b411b363SPhilipp Reisner 
54139f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
54149f4fe9adSAndreas Gruenbacher 	if (!peer_device)
54152735a594SAndreas Gruenbacher 		return -EIO;
54169f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
54171952e916SAndreas Gruenbacher 
5418b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5419b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5420b411b363SPhilipp Reisner 
542169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5422b411b363SPhilipp Reisner 
5423b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5424b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5425b411b363SPhilipp Reisner 	else
5426b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5427b411b363SPhilipp Reisner 
5428b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
54292735a594SAndreas Gruenbacher 		return 0;
54301d53f09eSLars Ellenberg 
5431b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5432b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5433b411b363SPhilipp Reisner 
5434b30ab791SAndreas Gruenbacher 	--device->ov_left;
5435ea5442afSLars Ellenberg 
5436ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5437b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5438b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5439ea5442afSLars Ellenberg 
5440b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
544184b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
544284b8c06bSAndreas Gruenbacher 		if (dw) {
544384b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
544484b8c06bSAndreas Gruenbacher 			dw->device = device;
544584b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5446b411b363SPhilipp Reisner 		} else {
544784b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5448b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5449b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5450b411b363SPhilipp Reisner 		}
5451b411b363SPhilipp Reisner 	}
5452b30ab791SAndreas Gruenbacher 	put_ldev(device);
54532735a594SAndreas Gruenbacher 	return 0;
5454b411b363SPhilipp Reisner }
5455b411b363SPhilipp Reisner 
5456bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
54570ced55a3SPhilipp Reisner {
54582735a594SAndreas Gruenbacher 	return 0;
54590ced55a3SPhilipp Reisner }
54600ced55a3SPhilipp Reisner 
5461668700b4SPhilipp Reisner struct meta_sock_cmd {
5462b411b363SPhilipp Reisner 	size_t pkt_size;
5463bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5464b411b363SPhilipp Reisner };
5465b411b363SPhilipp Reisner 
5466668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5467668700b4SPhilipp Reisner {
5468668700b4SPhilipp Reisner 	long t;
5469668700b4SPhilipp Reisner 	struct net_conf *nc;
5470668700b4SPhilipp Reisner 
5471668700b4SPhilipp Reisner 	rcu_read_lock();
5472668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5473668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5474668700b4SPhilipp Reisner 	rcu_read_unlock();
5475668700b4SPhilipp Reisner 
5476668700b4SPhilipp Reisner 	t *= HZ;
5477668700b4SPhilipp Reisner 	if (ping_timeout)
5478668700b4SPhilipp Reisner 		t /= 10;
5479668700b4SPhilipp Reisner 
5480668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5481668700b4SPhilipp Reisner }
5482668700b4SPhilipp Reisner 
5483668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5484668700b4SPhilipp Reisner {
5485668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5486668700b4SPhilipp Reisner }
5487668700b4SPhilipp Reisner 
5488668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5489668700b4SPhilipp Reisner {
5490668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5491668700b4SPhilipp Reisner }
5492668700b4SPhilipp Reisner 
5493668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5494e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5495e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5496b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5497b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5498b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5499d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5500b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5501b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5502b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5503b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5504b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5505b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5506b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
550702918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5508d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
55091952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
55101952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5511b411b363SPhilipp Reisner };
5512b411b363SPhilipp Reisner 
55131c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5514b411b363SPhilipp Reisner {
5515bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5516668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
551777351055SPhilipp Reisner 	struct packet_info pi;
5518668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5519257d0af6SPhilipp Reisner 	int rv;
5520bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5521b411b363SPhilipp Reisner 	int received = 0;
5522bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
552352b061a4SAndreas Gruenbacher 	int expect   = header_size;
552444ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
55253990e04dSPhilipp Reisner 	struct sched_param param = { .sched_priority = 2 };
5526b411b363SPhilipp Reisner 
55273990e04dSPhilipp Reisner 	rv = sched_setscheduler(current, SCHED_RR, &param);
55283990e04dSPhilipp Reisner 	if (rv < 0)
5529668700b4SPhilipp Reisner 		drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
5530b411b363SPhilipp Reisner 
5531e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
553280822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
553344ed167dSPhilipp Reisner 
5534668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
553544ed167dSPhilipp Reisner 
5536bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5537bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
55381ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
5539841ce241SAndreas Gruenbacher 				goto reconnect;
5540841ce241SAndreas Gruenbacher 			}
5541668700b4SPhilipp Reisner 			set_ping_timeout(connection);
554244ed167dSPhilipp Reisner 			ping_timeout_active = true;
5543b411b363SPhilipp Reisner 		}
5544b411b363SPhilipp Reisner 
5545668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
5546bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5547b411b363SPhilipp Reisner 
5548b411b363SPhilipp Reisner 		/* Note:
5549b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5550b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5551b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5552b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5553b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5554b411b363SPhilipp Reisner 		 * rv == expected: full header or command
5555b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
5556b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
5557b411b363SPhilipp Reisner 		 */
5558b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
5559b411b363SPhilipp Reisner 			received += rv;
5560b411b363SPhilipp Reisner 			buf	 += rv;
5561b411b363SPhilipp Reisner 		} else if (rv == 0) {
5562bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
5563b66623e3SPhilipp Reisner 				long t;
5564b66623e3SPhilipp Reisner 				rcu_read_lock();
5565bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
5566b66623e3SPhilipp Reisner 				rcu_read_unlock();
5567b66623e3SPhilipp Reisner 
5568bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
5569bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
5570b66623e3SPhilipp Reisner 						       t);
5571599377acSPhilipp Reisner 				if (t)
5572599377acSPhilipp Reisner 					break;
5573599377acSPhilipp Reisner 			}
55741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
5575b411b363SPhilipp Reisner 			goto reconnect;
5576b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
5577cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
5578cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
5579668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
5580cb6518cbSLars Ellenberg 				continue;
5581f36af18cSLars Ellenberg 			if (ping_timeout_active) {
55821ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
5583b411b363SPhilipp Reisner 				goto reconnect;
5584b411b363SPhilipp Reisner 			}
5585bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
5586b411b363SPhilipp Reisner 			continue;
5587b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
5588668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
5589668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
5590668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
5591668700b4SPhilipp Reisner 			flush_signals(current);
5592b411b363SPhilipp Reisner 			continue;
5593b411b363SPhilipp Reisner 		} else {
55941ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
5595b411b363SPhilipp Reisner 			goto reconnect;
5596b411b363SPhilipp Reisner 		}
5597b411b363SPhilipp Reisner 
5598b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
5599bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
5600b411b363SPhilipp Reisner 				goto reconnect;
5601668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
5602668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
56031ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
56042fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
5605b411b363SPhilipp Reisner 				goto disconnect;
5606b411b363SPhilipp Reisner 			}
5607e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
560852b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
56091ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
561077351055SPhilipp Reisner 					pi.cmd, pi.size);
5611b411b363SPhilipp Reisner 				goto reconnect;
5612b411b363SPhilipp Reisner 			}
5613257d0af6SPhilipp Reisner 		}
5614b411b363SPhilipp Reisner 		if (received == expect) {
56152735a594SAndreas Gruenbacher 			bool err;
5616a4fbda8eSPhilipp Reisner 
5617bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
56182735a594SAndreas Gruenbacher 			if (err) {
56191ec861ebSAndreas Gruenbacher 				drbd_err(connection, "%pf failed\n", cmd->fn);
5620b411b363SPhilipp Reisner 				goto reconnect;
56211952e916SAndreas Gruenbacher 			}
5622b411b363SPhilipp Reisner 
5623bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
5624f36af18cSLars Ellenberg 
5625668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
5626668700b4SPhilipp Reisner 				set_idle_timeout(connection);
562744ed167dSPhilipp Reisner 				ping_timeout_active = false;
562844ed167dSPhilipp Reisner 			}
5629b411b363SPhilipp Reisner 
5630bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
5631b411b363SPhilipp Reisner 			received = 0;
563252b061a4SAndreas Gruenbacher 			expect	 = header_size;
5633b411b363SPhilipp Reisner 			cmd	 = NULL;
5634b411b363SPhilipp Reisner 		}
5635b411b363SPhilipp Reisner 	}
5636b411b363SPhilipp Reisner 
5637b411b363SPhilipp Reisner 	if (0) {
5638b411b363SPhilipp Reisner reconnect:
5639bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5640bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
5641b411b363SPhilipp Reisner 	}
5642b411b363SPhilipp Reisner 	if (0) {
5643b411b363SPhilipp Reisner disconnect:
5644bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5645b411b363SPhilipp Reisner 	}
5646b411b363SPhilipp Reisner 
5647668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
5648b411b363SPhilipp Reisner 
5649b411b363SPhilipp Reisner 	return 0;
5650b411b363SPhilipp Reisner }
5651668700b4SPhilipp Reisner 
5652668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
5653668700b4SPhilipp Reisner {
5654668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
5655668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
5656668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
5657668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
5658668700b4SPhilipp Reisner 	struct net_conf *nc;
5659668700b4SPhilipp Reisner 	int tcp_cork, err;
5660668700b4SPhilipp Reisner 
5661668700b4SPhilipp Reisner 	rcu_read_lock();
5662668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5663668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
5664668700b4SPhilipp Reisner 	rcu_read_unlock();
5665668700b4SPhilipp Reisner 
5666668700b4SPhilipp Reisner 	if (tcp_cork)
5667668700b4SPhilipp Reisner 		drbd_tcp_cork(connection->meta.socket);
5668668700b4SPhilipp Reisner 
5669668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
5670668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
5671668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
5672668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
5673668700b4SPhilipp Reisner 
5674668700b4SPhilipp Reisner 	if (err) {
5675668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5676668700b4SPhilipp Reisner 		return;
5677668700b4SPhilipp Reisner 	}
5678668700b4SPhilipp Reisner 
5679668700b4SPhilipp Reisner 	if (tcp_cork)
5680668700b4SPhilipp Reisner 		drbd_tcp_uncork(connection->meta.socket);
5681668700b4SPhilipp Reisner 
5682668700b4SPhilipp Reisner 	return;
5683668700b4SPhilipp Reisner }
5684