1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_receiver.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner  */
24b411b363SPhilipp Reisner 
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner 
28b411b363SPhilipp Reisner #include <asm/uaccess.h>
29b411b363SPhilipp Reisner #include <net/sock.h>
30b411b363SPhilipp Reisner 
31b411b363SPhilipp Reisner #include <linux/drbd.h>
32b411b363SPhilipp Reisner #include <linux/fs.h>
33b411b363SPhilipp Reisner #include <linux/file.h>
34b411b363SPhilipp Reisner #include <linux/in.h>
35b411b363SPhilipp Reisner #include <linux/mm.h>
36b411b363SPhilipp Reisner #include <linux/memcontrol.h>
37b411b363SPhilipp Reisner #include <linux/mm_inline.h>
38b411b363SPhilipp Reisner #include <linux/slab.h>
39b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
40b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
41b411b363SPhilipp Reisner #include <linux/unistd.h>
42b411b363SPhilipp Reisner #include <linux/vmalloc.h>
43b411b363SPhilipp Reisner #include <linux/random.h>
44b411b363SPhilipp Reisner #include <linux/string.h>
45b411b363SPhilipp Reisner #include <linux/scatterlist.h>
46b411b363SPhilipp Reisner #include "drbd_int.h"
47a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
48b411b363SPhilipp Reisner #include "drbd_req.h"
49b411b363SPhilipp Reisner #include "drbd_vli.h"
50b411b363SPhilipp Reisner 
5120c68fdeSLars Ellenberg #define PRO_FEATURES (FF_TRIM)
5220c68fdeSLars Ellenberg 
5377351055SPhilipp Reisner struct packet_info {
5477351055SPhilipp Reisner 	enum drbd_packet cmd;
55e2857216SAndreas Gruenbacher 	unsigned int size;
56e2857216SAndreas Gruenbacher 	unsigned int vnr;
57e658983aSAndreas Gruenbacher 	void *data;
5877351055SPhilipp Reisner };
5977351055SPhilipp Reisner 
60b411b363SPhilipp Reisner enum finish_epoch {
61b411b363SPhilipp Reisner 	FE_STILL_LIVE,
62b411b363SPhilipp Reisner 	FE_DESTROYED,
63b411b363SPhilipp Reisner 	FE_RECYCLED,
64b411b363SPhilipp Reisner };
65b411b363SPhilipp Reisner 
66bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
67bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
6869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
69a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
70bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
7199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
72b411b363SPhilipp Reisner 
73b411b363SPhilipp Reisner 
74b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
75b411b363SPhilipp Reisner 
7645bb912bSLars Ellenberg /*
7745bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
7845bb912bSLars Ellenberg  * page->private being our "next" pointer.
7945bb912bSLars Ellenberg  */
8045bb912bSLars Ellenberg 
8145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
8245bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
8345bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
8445bb912bSLars Ellenberg  */
8545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
8645bb912bSLars Ellenberg {
8745bb912bSLars Ellenberg 	struct page *page;
8845bb912bSLars Ellenberg 	struct page *tmp;
8945bb912bSLars Ellenberg 
9045bb912bSLars Ellenberg 	BUG_ON(!n);
9145bb912bSLars Ellenberg 	BUG_ON(!head);
9245bb912bSLars Ellenberg 
9345bb912bSLars Ellenberg 	page = *head;
9423ce4227SPhilipp Reisner 
9523ce4227SPhilipp Reisner 	if (!page)
9623ce4227SPhilipp Reisner 		return NULL;
9723ce4227SPhilipp Reisner 
9845bb912bSLars Ellenberg 	while (page) {
9945bb912bSLars Ellenberg 		tmp = page_chain_next(page);
10045bb912bSLars Ellenberg 		if (--n == 0)
10145bb912bSLars Ellenberg 			break; /* found sufficient pages */
10245bb912bSLars Ellenberg 		if (tmp == NULL)
10345bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
10445bb912bSLars Ellenberg 			return NULL;
10545bb912bSLars Ellenberg 		page = tmp;
10645bb912bSLars Ellenberg 	}
10745bb912bSLars Ellenberg 
10845bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
10945bb912bSLars Ellenberg 	set_page_private(page, 0);
11045bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
11145bb912bSLars Ellenberg 	page = *head;
11245bb912bSLars Ellenberg 	*head = tmp;
11345bb912bSLars Ellenberg 	return page;
11445bb912bSLars Ellenberg }
11545bb912bSLars Ellenberg 
11645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
11745bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
11845bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
11945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
12045bb912bSLars Ellenberg {
12145bb912bSLars Ellenberg 	struct page *tmp;
12245bb912bSLars Ellenberg 	int i = 1;
12345bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page)))
12445bb912bSLars Ellenberg 		++i, page = tmp;
12545bb912bSLars Ellenberg 	if (len)
12645bb912bSLars Ellenberg 		*len = i;
12745bb912bSLars Ellenberg 	return page;
12845bb912bSLars Ellenberg }
12945bb912bSLars Ellenberg 
13045bb912bSLars Ellenberg static int page_chain_free(struct page *page)
13145bb912bSLars Ellenberg {
13245bb912bSLars Ellenberg 	struct page *tmp;
13345bb912bSLars Ellenberg 	int i = 0;
13445bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
13545bb912bSLars Ellenberg 		put_page(page);
13645bb912bSLars Ellenberg 		++i;
13745bb912bSLars Ellenberg 	}
13845bb912bSLars Ellenberg 	return i;
13945bb912bSLars Ellenberg }
14045bb912bSLars Ellenberg 
14145bb912bSLars Ellenberg static void page_chain_add(struct page **head,
14245bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
14345bb912bSLars Ellenberg {
14445bb912bSLars Ellenberg #if 1
14545bb912bSLars Ellenberg 	struct page *tmp;
14645bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
14745bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14845bb912bSLars Ellenberg #endif
14945bb912bSLars Ellenberg 
15045bb912bSLars Ellenberg 	/* add chain to head */
15145bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
15245bb912bSLars Ellenberg 	*head = chain_first;
15345bb912bSLars Ellenberg }
15445bb912bSLars Ellenberg 
155b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
15618c2d522SAndreas Gruenbacher 				       unsigned int number)
157b411b363SPhilipp Reisner {
158b411b363SPhilipp Reisner 	struct page *page = NULL;
15945bb912bSLars Ellenberg 	struct page *tmp = NULL;
16018c2d522SAndreas Gruenbacher 	unsigned int i = 0;
161b411b363SPhilipp Reisner 
162b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
163b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
16445bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
165b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
16645bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
16745bb912bSLars Ellenberg 		if (page)
16845bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
169b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
17045bb912bSLars Ellenberg 		if (page)
17145bb912bSLars Ellenberg 			return page;
172b411b363SPhilipp Reisner 	}
17345bb912bSLars Ellenberg 
174b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
176b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
17745bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17845bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17945bb912bSLars Ellenberg 		if (!tmp)
18045bb912bSLars Ellenberg 			break;
18145bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
18245bb912bSLars Ellenberg 		page = tmp;
18345bb912bSLars Ellenberg 	}
18445bb912bSLars Ellenberg 
18545bb912bSLars Ellenberg 	if (i == number)
186b411b363SPhilipp Reisner 		return page;
18745bb912bSLars Ellenberg 
18845bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
189c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
19045bb912bSLars Ellenberg 	 * function "soon". */
19145bb912bSLars Ellenberg 	if (page) {
19245bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
19345bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
19445bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
19545bb912bSLars Ellenberg 		drbd_pp_vacant += i;
19645bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
19745bb912bSLars Ellenberg 	}
19845bb912bSLars Ellenberg 	return NULL;
199b411b363SPhilipp Reisner }
200b411b363SPhilipp Reisner 
201b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
202a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
203b411b363SPhilipp Reisner {
204a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
205b411b363SPhilipp Reisner 
206b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
207b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
208b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
209b411b363SPhilipp Reisner 	   stop to examine the list... */
210b411b363SPhilipp Reisner 
211a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
212045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
213b411b363SPhilipp Reisner 			break;
214a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
215b411b363SPhilipp Reisner 	}
216b411b363SPhilipp Reisner }
217b411b363SPhilipp Reisner 
218668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
219b411b363SPhilipp Reisner {
220b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
221db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
222b411b363SPhilipp Reisner 
2230500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
224b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2250500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
226a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
227b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
228b411b363SPhilipp Reisner }
229b411b363SPhilipp Reisner 
230668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
231668700b4SPhilipp Reisner {
232668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
233668700b4SPhilipp Reisner 	int vnr;
234668700b4SPhilipp Reisner 
235668700b4SPhilipp Reisner 	rcu_read_lock();
236668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
237668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
238668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
239668700b4SPhilipp Reisner 			continue;
240668700b4SPhilipp Reisner 
241668700b4SPhilipp Reisner 		kref_get(&device->kref);
242668700b4SPhilipp Reisner 		rcu_read_unlock();
243668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
244668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
245668700b4SPhilipp Reisner 		rcu_read_lock();
246668700b4SPhilipp Reisner 	}
247668700b4SPhilipp Reisner 	rcu_read_unlock();
248668700b4SPhilipp Reisner }
249668700b4SPhilipp Reisner 
250b411b363SPhilipp Reisner /**
251c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
252b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
25345bb912bSLars Ellenberg  * @number:	number of pages requested
25445bb912bSLars Ellenberg  * @retry:	whether to retry, if not enough pages are available right now
255b411b363SPhilipp Reisner  *
25645bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2570e49d7b0SLars Ellenberg  * the kernel.
25845bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25945bb912bSLars Ellenberg  *
2600e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2610e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2620e49d7b0SLars Ellenberg  *
2630e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2640e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2650e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2660e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2670e49d7b0SLars Ellenberg  *
26845bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
269b411b363SPhilipp Reisner  */
27069a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
271c37c8ecfSAndreas Gruenbacher 			      bool retry)
272b411b363SPhilipp Reisner {
27369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
274b411b363SPhilipp Reisner 	struct page *page = NULL;
27544ed167dSPhilipp Reisner 	struct net_conf *nc;
276b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2770e49d7b0SLars Ellenberg 	unsigned int mxb;
278b411b363SPhilipp Reisner 
27944ed167dSPhilipp Reisner 	rcu_read_lock();
28069a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
28144ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
28244ed167dSPhilipp Reisner 	rcu_read_unlock();
28344ed167dSPhilipp Reisner 
284b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
285b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
286b411b363SPhilipp Reisner 
287668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
288668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
289668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
290668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
291668700b4SPhilipp Reisner 
29245bb912bSLars Ellenberg 	while (page == NULL) {
293b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
294b411b363SPhilipp Reisner 
295668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
296b411b363SPhilipp Reisner 
297b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
298b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
299b411b363SPhilipp Reisner 			if (page)
300b411b363SPhilipp Reisner 				break;
301b411b363SPhilipp Reisner 		}
302b411b363SPhilipp Reisner 
303b411b363SPhilipp Reisner 		if (!retry)
304b411b363SPhilipp Reisner 			break;
305b411b363SPhilipp Reisner 
306b411b363SPhilipp Reisner 		if (signal_pending(current)) {
307d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
308b411b363SPhilipp Reisner 			break;
309b411b363SPhilipp Reisner 		}
310b411b363SPhilipp Reisner 
3110e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3120e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
313b411b363SPhilipp Reisner 	}
314b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
315b411b363SPhilipp Reisner 
31645bb912bSLars Ellenberg 	if (page)
317b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
318b411b363SPhilipp Reisner 	return page;
319b411b363SPhilipp Reisner }
320b411b363SPhilipp Reisner 
321c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3220500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
32345bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
32445bb912bSLars Ellenberg  * or returns all pages to the system. */
325b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
326b411b363SPhilipp Reisner {
327b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
328b411b363SPhilipp Reisner 	int i;
329435f0740SLars Ellenberg 
330a73ff323SLars Ellenberg 	if (page == NULL)
331a73ff323SLars Ellenberg 		return;
332a73ff323SLars Ellenberg 
3331816a2b4SLars Ellenberg 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
33445bb912bSLars Ellenberg 		i = page_chain_free(page);
33545bb912bSLars Ellenberg 	else {
33645bb912bSLars Ellenberg 		struct page *tmp;
33745bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
338b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33945bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
34045bb912bSLars Ellenberg 		drbd_pp_vacant += i;
341b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
342b411b363SPhilipp Reisner 	}
343435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
34445bb912bSLars Ellenberg 	if (i < 0)
345d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
346435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
347b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
348b411b363SPhilipp Reisner }
349b411b363SPhilipp Reisner 
350b411b363SPhilipp Reisner /*
351b411b363SPhilipp Reisner You need to hold the req_lock:
352b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
353b411b363SPhilipp Reisner 
354b411b363SPhilipp Reisner You must not have the req_lock:
3553967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3560db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3577721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
358b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
359a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
360b411b363SPhilipp Reisner  drbd_clear_done_ee()
361b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
362b411b363SPhilipp Reisner */
363b411b363SPhilipp Reisner 
364f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
36569a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
366a0fb3c47SLars Ellenberg 		    unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
367b411b363SPhilipp Reisner {
36869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
369db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
370a73ff323SLars Ellenberg 	struct page *page = NULL;
37145bb912bSLars Ellenberg 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
372b411b363SPhilipp Reisner 
373b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
374b411b363SPhilipp Reisner 		return NULL;
375b411b363SPhilipp Reisner 
376db830c46SAndreas Gruenbacher 	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
377db830c46SAndreas Gruenbacher 	if (!peer_req) {
378b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
379d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
380b411b363SPhilipp Reisner 		return NULL;
381b411b363SPhilipp Reisner 	}
382b411b363SPhilipp Reisner 
383a0fb3c47SLars Ellenberg 	if (has_payload && data_size) {
384d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
385d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38645bb912bSLars Ellenberg 		if (!page)
38745bb912bSLars Ellenberg 			goto fail;
388a73ff323SLars Ellenberg 	}
389b411b363SPhilipp Reisner 
390c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
391c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
392db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
393db830c46SAndreas Gruenbacher 	peer_req->i.size = data_size;
394db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
395c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
396a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
397db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3989a8e7753SAndreas Gruenbacher 	/*
3999a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
4009a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
4019a8e7753SAndreas Gruenbacher 	 */
402db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
403b411b363SPhilipp Reisner 
404db830c46SAndreas Gruenbacher 	return peer_req;
405b411b363SPhilipp Reisner 
40645bb912bSLars Ellenberg  fail:
407db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
408b411b363SPhilipp Reisner 	return NULL;
409b411b363SPhilipp Reisner }
410b411b363SPhilipp Reisner 
411b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
412f6ffca9fSAndreas Gruenbacher 		       int is_net)
413b411b363SPhilipp Reisner {
41421ae5d7fSLars Ellenberg 	might_sleep();
415db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
416db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
417b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4180b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4190b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
42021ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
42121ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
42221ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
42321ae5d7fSLars Ellenberg 	}
424db830c46SAndreas Gruenbacher 	mempool_free(peer_req, drbd_ee_mempool);
425b411b363SPhilipp Reisner }
426b411b363SPhilipp Reisner 
427b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
428b411b363SPhilipp Reisner {
429b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
430db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
431b411b363SPhilipp Reisner 	int count = 0;
432b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
433b411b363SPhilipp Reisner 
4340500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
435b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4360500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
437b411b363SPhilipp Reisner 
438a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
439b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
440b411b363SPhilipp Reisner 		count++;
441b411b363SPhilipp Reisner 	}
442b411b363SPhilipp Reisner 	return count;
443b411b363SPhilipp Reisner }
444b411b363SPhilipp Reisner 
445b411b363SPhilipp Reisner /*
446a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
447b411b363SPhilipp Reisner  */
448b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
449b411b363SPhilipp Reisner {
450b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
451b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
452db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
453e2b3032bSAndreas Gruenbacher 	int err = 0;
454b411b363SPhilipp Reisner 
4550500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
456b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
457b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4580500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
459b411b363SPhilipp Reisner 
460a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
461b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
462b411b363SPhilipp Reisner 
463b411b363SPhilipp Reisner 	/* possible callbacks here:
464d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
465b411b363SPhilipp Reisner 	 * all ignore the last argument.
466b411b363SPhilipp Reisner 	 */
467a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
468e2b3032bSAndreas Gruenbacher 		int err2;
469e2b3032bSAndreas Gruenbacher 
470b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
471a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
472e2b3032bSAndreas Gruenbacher 		if (!err)
473e2b3032bSAndreas Gruenbacher 			err = err2;
474b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
475b411b363SPhilipp Reisner 	}
476b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
477b411b363SPhilipp Reisner 
478e2b3032bSAndreas Gruenbacher 	return err;
479b411b363SPhilipp Reisner }
480b411b363SPhilipp Reisner 
481b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
482d4da1537SAndreas Gruenbacher 				     struct list_head *head)
483b411b363SPhilipp Reisner {
484b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
485b411b363SPhilipp Reisner 
486b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
487b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
488b411b363SPhilipp Reisner 	while (!list_empty(head)) {
489b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4900500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4917eaceaccSJens Axboe 		io_schedule();
492b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4930500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
494b411b363SPhilipp Reisner 	}
495b411b363SPhilipp Reisner }
496b411b363SPhilipp Reisner 
497b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
498d4da1537SAndreas Gruenbacher 				    struct list_head *head)
499b411b363SPhilipp Reisner {
5000500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
501b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
5020500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
503b411b363SPhilipp Reisner }
504b411b363SPhilipp Reisner 
505dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
506b411b363SPhilipp Reisner {
507b411b363SPhilipp Reisner 	struct kvec iov = {
508b411b363SPhilipp Reisner 		.iov_base = buf,
509b411b363SPhilipp Reisner 		.iov_len = size,
510b411b363SPhilipp Reisner 	};
511b411b363SPhilipp Reisner 	struct msghdr msg = {
512b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
513b411b363SPhilipp Reisner 	};
514f730c848SAl Viro 	return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
515b411b363SPhilipp Reisner }
516b411b363SPhilipp Reisner 
517bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
518b411b363SPhilipp Reisner {
519b411b363SPhilipp Reisner 	int rv;
520b411b363SPhilipp Reisner 
521bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
522b411b363SPhilipp Reisner 
523b411b363SPhilipp Reisner 	if (rv < 0) {
524b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5251ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
526b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5271ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
528b411b363SPhilipp Reisner 	} else if (rv == 0) {
529bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
530b66623e3SPhilipp Reisner 			long t;
531b66623e3SPhilipp Reisner 			rcu_read_lock();
532bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
533b66623e3SPhilipp Reisner 			rcu_read_unlock();
534b66623e3SPhilipp Reisner 
535bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
536b66623e3SPhilipp Reisner 
537599377acSPhilipp Reisner 			if (t)
538599377acSPhilipp Reisner 				goto out;
539599377acSPhilipp Reisner 		}
5401ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
541599377acSPhilipp Reisner 	}
542599377acSPhilipp Reisner 
543b411b363SPhilipp Reisner 	if (rv != size)
544bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
545b411b363SPhilipp Reisner 
546599377acSPhilipp Reisner out:
547b411b363SPhilipp Reisner 	return rv;
548b411b363SPhilipp Reisner }
549b411b363SPhilipp Reisner 
550bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
551c6967746SAndreas Gruenbacher {
552c6967746SAndreas Gruenbacher 	int err;
553c6967746SAndreas Gruenbacher 
554bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
555c6967746SAndreas Gruenbacher 	if (err != size) {
556c6967746SAndreas Gruenbacher 		if (err >= 0)
557c6967746SAndreas Gruenbacher 			err = -EIO;
558c6967746SAndreas Gruenbacher 	} else
559c6967746SAndreas Gruenbacher 		err = 0;
560c6967746SAndreas Gruenbacher 	return err;
561c6967746SAndreas Gruenbacher }
562c6967746SAndreas Gruenbacher 
563bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
564a5c31904SAndreas Gruenbacher {
565a5c31904SAndreas Gruenbacher 	int err;
566a5c31904SAndreas Gruenbacher 
567bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
568a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5691ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
570a5c31904SAndreas Gruenbacher 	return err;
571a5c31904SAndreas Gruenbacher }
572a5c31904SAndreas Gruenbacher 
5735dbf1673SLars Ellenberg /* quoting tcp(7):
5745dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5755dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5765dbf1673SLars Ellenberg  * This is our wrapper to do so.
5775dbf1673SLars Ellenberg  */
5785dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5795dbf1673SLars Ellenberg 		unsigned int rcv)
5805dbf1673SLars Ellenberg {
5815dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5825dbf1673SLars Ellenberg 	if (snd) {
5835dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5845dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5855dbf1673SLars Ellenberg 	}
5865dbf1673SLars Ellenberg 	if (rcv) {
5875dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5885dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5895dbf1673SLars Ellenberg 	}
5905dbf1673SLars Ellenberg }
5915dbf1673SLars Ellenberg 
592bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
593b411b363SPhilipp Reisner {
594b411b363SPhilipp Reisner 	const char *what;
595b411b363SPhilipp Reisner 	struct socket *sock;
596b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59744ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59844ed167dSPhilipp Reisner 	struct net_conf *nc;
59944ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
60069ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
601b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
602b411b363SPhilipp Reisner 
60344ed167dSPhilipp Reisner 	rcu_read_lock();
604bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60544ed167dSPhilipp Reisner 	if (!nc) {
60644ed167dSPhilipp Reisner 		rcu_read_unlock();
607b411b363SPhilipp Reisner 		return NULL;
60844ed167dSPhilipp Reisner 	}
60944ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
61044ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
61169ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
612089c075dSAndreas Gruenbacher 	rcu_read_unlock();
61344ed167dSPhilipp Reisner 
614bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
615bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61644ed167dSPhilipp Reisner 
617bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61844ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61944ed167dSPhilipp Reisner 	else
62044ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
62144ed167dSPhilipp Reisner 
622bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
623bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
624b411b363SPhilipp Reisner 
625b411b363SPhilipp Reisner 	what = "sock_create_kern";
626eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
627b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
628b411b363SPhilipp Reisner 	if (err < 0) {
629b411b363SPhilipp Reisner 		sock = NULL;
630b411b363SPhilipp Reisner 		goto out;
631b411b363SPhilipp Reisner 	}
632b411b363SPhilipp Reisner 
633b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63469ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63544ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
636b411b363SPhilipp Reisner 
637b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
638b411b363SPhilipp Reisner 	*  for the outgoing connections.
639b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
640b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
641b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
642b411b363SPhilipp Reisner 	*  a free one dynamically.
643b411b363SPhilipp Reisner 	*/
644b411b363SPhilipp Reisner 	what = "bind before connect";
64544ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
646b411b363SPhilipp Reisner 	if (err < 0)
647b411b363SPhilipp Reisner 		goto out;
648b411b363SPhilipp Reisner 
649b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
650b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
651b411b363SPhilipp Reisner 	disconnect_on_error = 0;
652b411b363SPhilipp Reisner 	what = "connect";
65344ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
654b411b363SPhilipp Reisner 
655b411b363SPhilipp Reisner out:
656b411b363SPhilipp Reisner 	if (err < 0) {
657b411b363SPhilipp Reisner 		if (sock) {
658b411b363SPhilipp Reisner 			sock_release(sock);
659b411b363SPhilipp Reisner 			sock = NULL;
660b411b363SPhilipp Reisner 		}
661b411b363SPhilipp Reisner 		switch (-err) {
662b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
663b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
664b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
665b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
666b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
667b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
668b411b363SPhilipp Reisner 			disconnect_on_error = 0;
669b411b363SPhilipp Reisner 			break;
670b411b363SPhilipp Reisner 		default:
6711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
672b411b363SPhilipp Reisner 		}
673b411b363SPhilipp Reisner 		if (disconnect_on_error)
674bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
675b411b363SPhilipp Reisner 	}
67644ed167dSPhilipp Reisner 
677b411b363SPhilipp Reisner 	return sock;
678b411b363SPhilipp Reisner }
679b411b363SPhilipp Reisner 
6807a426fd8SPhilipp Reisner struct accept_wait_data {
681bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6827a426fd8SPhilipp Reisner 	struct socket *s_listen;
6837a426fd8SPhilipp Reisner 	struct completion door_bell;
6847a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6857a426fd8SPhilipp Reisner 
6867a426fd8SPhilipp Reisner };
6877a426fd8SPhilipp Reisner 
688715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
689b411b363SPhilipp Reisner {
6907a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
691715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6927a426fd8SPhilipp Reisner 
693715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
694715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6957a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
696715306f6SAndreas Gruenbacher 	state_change(sk);
6977a426fd8SPhilipp Reisner }
6987a426fd8SPhilipp Reisner 
699bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
700b411b363SPhilipp Reisner {
7011f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
70244ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
7031f3e509bSPhilipp Reisner 	struct socket *s_listen;
70444ed167dSPhilipp Reisner 	struct net_conf *nc;
705b411b363SPhilipp Reisner 	const char *what;
706b411b363SPhilipp Reisner 
70744ed167dSPhilipp Reisner 	rcu_read_lock();
708bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70944ed167dSPhilipp Reisner 	if (!nc) {
71044ed167dSPhilipp Reisner 		rcu_read_unlock();
7117a426fd8SPhilipp Reisner 		return -EIO;
71244ed167dSPhilipp Reisner 	}
71344ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71444ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71544ed167dSPhilipp Reisner 	rcu_read_unlock();
716b411b363SPhilipp Reisner 
717bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
718bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
719b411b363SPhilipp Reisner 
720b411b363SPhilipp Reisner 	what = "sock_create_kern";
721eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
722b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
723b411b363SPhilipp Reisner 	if (err) {
724b411b363SPhilipp Reisner 		s_listen = NULL;
725b411b363SPhilipp Reisner 		goto out;
726b411b363SPhilipp Reisner 	}
727b411b363SPhilipp Reisner 
7284a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72944ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
730b411b363SPhilipp Reisner 
731b411b363SPhilipp Reisner 	what = "bind before listen";
73244ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
733b411b363SPhilipp Reisner 	if (err < 0)
734b411b363SPhilipp Reisner 		goto out;
735b411b363SPhilipp Reisner 
7367a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7377a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7387a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
739715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7407a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7417a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
742b411b363SPhilipp Reisner 
7432820fd39SPhilipp Reisner 	what = "listen";
7442820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7452820fd39SPhilipp Reisner 	if (err < 0)
7462820fd39SPhilipp Reisner 		goto out;
7472820fd39SPhilipp Reisner 
7487a426fd8SPhilipp Reisner 	return 0;
749b411b363SPhilipp Reisner out:
750b411b363SPhilipp Reisner 	if (s_listen)
751b411b363SPhilipp Reisner 		sock_release(s_listen);
752b411b363SPhilipp Reisner 	if (err < 0) {
753b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7541ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
755bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
756b411b363SPhilipp Reisner 		}
757b411b363SPhilipp Reisner 	}
7581f3e509bSPhilipp Reisner 
7597a426fd8SPhilipp Reisner 	return -EIO;
7601f3e509bSPhilipp Reisner }
7611f3e509bSPhilipp Reisner 
762715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
763715306f6SAndreas Gruenbacher {
764715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
765715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
766715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
767715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
768715306f6SAndreas Gruenbacher }
769715306f6SAndreas Gruenbacher 
770bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7711f3e509bSPhilipp Reisner {
7721f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7731f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7741f3e509bSPhilipp Reisner 	struct net_conf *nc;
7751f3e509bSPhilipp Reisner 
7761f3e509bSPhilipp Reisner 	rcu_read_lock();
777bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7781f3e509bSPhilipp Reisner 	if (!nc) {
7791f3e509bSPhilipp Reisner 		rcu_read_unlock();
7801f3e509bSPhilipp Reisner 		return NULL;
7811f3e509bSPhilipp Reisner 	}
7821f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7831f3e509bSPhilipp Reisner 	rcu_read_unlock();
7841f3e509bSPhilipp Reisner 
7851f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78638b682b2SAkinobu Mita 	/* 28.5% random jitter */
78738b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7881f3e509bSPhilipp Reisner 
7897a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7907a426fd8SPhilipp Reisner 	if (err <= 0)
7917a426fd8SPhilipp Reisner 		return NULL;
7921f3e509bSPhilipp Reisner 
7937a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
794b411b363SPhilipp Reisner 	if (err < 0) {
795b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7961ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
797bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
798b411b363SPhilipp Reisner 		}
799b411b363SPhilipp Reisner 	}
800b411b363SPhilipp Reisner 
801715306f6SAndreas Gruenbacher 	if (s_estab)
802715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
803b411b363SPhilipp Reisner 
804b411b363SPhilipp Reisner 	return s_estab;
805b411b363SPhilipp Reisner }
806b411b363SPhilipp Reisner 
807bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
808b411b363SPhilipp Reisner 
809bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8109f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8119f5bdc33SAndreas Gruenbacher {
812bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8139f5bdc33SAndreas Gruenbacher 		return -EIO;
814bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
815b411b363SPhilipp Reisner }
816b411b363SPhilipp Reisner 
817bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
818b411b363SPhilipp Reisner {
819bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8209f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8214920e37aSPhilipp Reisner 	struct net_conf *nc;
8229f5bdc33SAndreas Gruenbacher 	int err;
823b411b363SPhilipp Reisner 
8244920e37aSPhilipp Reisner 	rcu_read_lock();
8254920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8264920e37aSPhilipp Reisner 	if (!nc) {
8274920e37aSPhilipp Reisner 		rcu_read_unlock();
8284920e37aSPhilipp Reisner 		return -EIO;
8294920e37aSPhilipp Reisner 	}
8304920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8314920e37aSPhilipp Reisner 	rcu_read_unlock();
8324920e37aSPhilipp Reisner 
833bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8349f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8359f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8369f5bdc33SAndreas Gruenbacher 			err = -EIO;
8379f5bdc33SAndreas Gruenbacher 		return err;
8389f5bdc33SAndreas Gruenbacher 	}
839bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8409f5bdc33SAndreas Gruenbacher 	if (err)
8419f5bdc33SAndreas Gruenbacher 		return err;
8429f5bdc33SAndreas Gruenbacher 	return pi.cmd;
843b411b363SPhilipp Reisner }
844b411b363SPhilipp Reisner 
845b411b363SPhilipp Reisner /**
846b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
847b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
848b411b363SPhilipp Reisner  */
8495d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
850b411b363SPhilipp Reisner {
851b411b363SPhilipp Reisner 	int rr;
852b411b363SPhilipp Reisner 	char tb[4];
853b411b363SPhilipp Reisner 
854b411b363SPhilipp Reisner 	if (!*sock)
85581e84650SAndreas Gruenbacher 		return false;
856b411b363SPhilipp Reisner 
857dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
858b411b363SPhilipp Reisner 
859b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
86081e84650SAndreas Gruenbacher 		return true;
861b411b363SPhilipp Reisner 	} else {
862b411b363SPhilipp Reisner 		sock_release(*sock);
863b411b363SPhilipp Reisner 		*sock = NULL;
86481e84650SAndreas Gruenbacher 		return false;
865b411b363SPhilipp Reisner 	}
866b411b363SPhilipp Reisner }
8675d0b17f1SPhilipp Reisner 
8685d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8695d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8705d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8715d0b17f1SPhilipp Reisner {
8725d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8735d0b17f1SPhilipp Reisner 	int timeout;
8745d0b17f1SPhilipp Reisner 	bool ok;
8755d0b17f1SPhilipp Reisner 
8765d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8775d0b17f1SPhilipp Reisner 		return false;
8785d0b17f1SPhilipp Reisner 
8795d0b17f1SPhilipp Reisner 	rcu_read_lock();
8805d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8815d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8825d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8835d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8845d0b17f1SPhilipp Reisner 
8855d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8865d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8875d0b17f1SPhilipp Reisner 
8885d0b17f1SPhilipp Reisner 	return ok;
8895d0b17f1SPhilipp Reisner }
8905d0b17f1SPhilipp Reisner 
8912325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8922325eb66SPhilipp Reisner    in a connection */
89369a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
894907599e0SPhilipp Reisner {
89569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8960829f5edSAndreas Gruenbacher 	int err;
897907599e0SPhilipp Reisner 
898b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
899b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
900907599e0SPhilipp Reisner 
90169a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
90269a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
903b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9048410da8fSPhilipp Reisner 
90569a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9060829f5edSAndreas Gruenbacher 	if (!err)
90769a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9080829f5edSAndreas Gruenbacher 	if (!err)
90969a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9100829f5edSAndreas Gruenbacher 	if (!err)
91169a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
912b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
913b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
914b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
915b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9160829f5edSAndreas Gruenbacher 	return err;
917907599e0SPhilipp Reisner }
918b411b363SPhilipp Reisner 
919b411b363SPhilipp Reisner /*
920b411b363SPhilipp Reisner  * return values:
921b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
922b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
923b411b363SPhilipp Reisner  *  -1 peer talks different language,
924b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
925b411b363SPhilipp Reisner  *  -2 We do not have a network config...
926b411b363SPhilipp Reisner  */
927bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
928b411b363SPhilipp Reisner {
9297da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
930c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
93144ed167dSPhilipp Reisner 	struct net_conf *nc;
9325d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9335d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
934197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9357a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
936bde89a9eSAndreas Gruenbacher 		.connection = connection,
9377a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9387a426fd8SPhilipp Reisner 	};
939b411b363SPhilipp Reisner 
940bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
941bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
942b411b363SPhilipp Reisner 		return -2;
943b411b363SPhilipp Reisner 
9447da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
945bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
946bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9477da35862SPhilipp Reisner 	sock.socket = NULL;
9487da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
949bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
950bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9517da35862SPhilipp Reisner 	msock.socket = NULL;
9527da35862SPhilipp Reisner 
9530916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
954bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
955b411b363SPhilipp Reisner 
956bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9577a426fd8SPhilipp Reisner 		return 0;
958b411b363SPhilipp Reisner 
959b411b363SPhilipp Reisner 	do {
9602bf89621SAndreas Gruenbacher 		struct socket *s;
961b411b363SPhilipp Reisner 
962bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
963b411b363SPhilipp Reisner 		if (s) {
9647da35862SPhilipp Reisner 			if (!sock.socket) {
9657da35862SPhilipp Reisner 				sock.socket = s;
966bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9677da35862SPhilipp Reisner 			} else if (!msock.socket) {
968bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9697da35862SPhilipp Reisner 				msock.socket = s;
970bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
971b411b363SPhilipp Reisner 			} else {
9721ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
973b411b363SPhilipp Reisner 				goto out_release_sockets;
974b411b363SPhilipp Reisner 			}
975b411b363SPhilipp Reisner 		}
976b411b363SPhilipp Reisner 
9775d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
978b411b363SPhilipp Reisner 			break;
979b411b363SPhilipp Reisner 
980b411b363SPhilipp Reisner retry:
981bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
982b411b363SPhilipp Reisner 		if (s) {
983bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9847da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9857da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98692f14951SPhilipp Reisner 			switch (fp) {
987e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9887da35862SPhilipp Reisner 				if (sock.socket) {
9891ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9907da35862SPhilipp Reisner 					sock_release(sock.socket);
99180c6eed4SPhilipp Reisner 					sock.socket = s;
99280c6eed4SPhilipp Reisner 					goto randomize;
993b411b363SPhilipp Reisner 				}
9947da35862SPhilipp Reisner 				sock.socket = s;
995b411b363SPhilipp Reisner 				break;
996e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
997bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9987da35862SPhilipp Reisner 				if (msock.socket) {
9991ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
10007da35862SPhilipp Reisner 					sock_release(msock.socket);
100180c6eed4SPhilipp Reisner 					msock.socket = s;
100280c6eed4SPhilipp Reisner 					goto randomize;
1003b411b363SPhilipp Reisner 				}
10047da35862SPhilipp Reisner 				msock.socket = s;
1005b411b363SPhilipp Reisner 				break;
1006b411b363SPhilipp Reisner 			default:
10071ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1008b411b363SPhilipp Reisner 				sock_release(s);
100980c6eed4SPhilipp Reisner randomize:
101038b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1011b411b363SPhilipp Reisner 					goto retry;
1012b411b363SPhilipp Reisner 			}
1013b411b363SPhilipp Reisner 		}
1014b411b363SPhilipp Reisner 
1015bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1016b411b363SPhilipp Reisner 			goto out_release_sockets;
1017b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1018b411b363SPhilipp Reisner 			flush_signals(current);
1019b411b363SPhilipp Reisner 			smp_rmb();
1020bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1021b411b363SPhilipp Reisner 				goto out_release_sockets;
1022b411b363SPhilipp Reisner 		}
1023b411b363SPhilipp Reisner 
10245d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1025b666dbf8SPhilipp Reisner 	} while (!ok);
1026b411b363SPhilipp Reisner 
10277a426fd8SPhilipp Reisner 	if (ad.s_listen)
10287a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1029b411b363SPhilipp Reisner 
103098683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
103198683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1032b411b363SPhilipp Reisner 
10337da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10347da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1035b411b363SPhilipp Reisner 
10367da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10377da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1038b411b363SPhilipp Reisner 
1039b411b363SPhilipp Reisner 	/* NOT YET ...
1040bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10417da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10426038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1043b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104444ed167dSPhilipp Reisner 	rcu_read_lock();
1045bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1046b411b363SPhilipp Reisner 
10477da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10487da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104944ed167dSPhilipp Reisner 
10507da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
105144ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
105208b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105344ed167dSPhilipp Reisner 	rcu_read_unlock();
105444ed167dSPhilipp Reisner 
10557da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1056b411b363SPhilipp Reisner 
1057b411b363SPhilipp Reisner 	/* we don't want delays.
105825985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
10597da35862SPhilipp Reisner 	drbd_tcp_nodelay(sock.socket);
10607da35862SPhilipp Reisner 	drbd_tcp_nodelay(msock.socket);
1061b411b363SPhilipp Reisner 
1062bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1063bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1064bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1065b411b363SPhilipp Reisner 
1066bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1067b411b363SPhilipp Reisner 	if (h <= 0)
1068b411b363SPhilipp Reisner 		return h;
1069b411b363SPhilipp Reisner 
1070bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1071b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1072bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1073b10d96cbSJohannes Thoma 		case -1:
10741ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1075b411b363SPhilipp Reisner 			return -1;
1076b10d96cbSJohannes Thoma 		case 0:
10771ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1078b10d96cbSJohannes Thoma 			return 0;
1079b411b363SPhilipp Reisner 		}
1080b411b363SPhilipp Reisner 	}
1081b411b363SPhilipp Reisner 
1082bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1083bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1084b411b363SPhilipp Reisner 
1085bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10867e2455c1SPhilipp Reisner 		return -1;
10871e86ac48SPhilipp Reisner 
108813c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108913c76abaSPhilipp Reisner 	 * being promoted to Primary.
109013c76abaSPhilipp Reisner 	 *
109113c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
109213c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109313c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109413c76abaSPhilipp Reisner 	 */
109531007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109631007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109731007745SPhilipp Reisner 
109831007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110131007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110231007745SPhilipp Reisner 
110331007745SPhilipp Reisner 	rcu_read_lock();
110431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110531007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110631007745SPhilipp Reisner 		kref_get(&device->kref);
110731007745SPhilipp Reisner 		rcu_read_unlock();
110813c76abaSPhilipp Reisner 
110908b165baSPhilipp Reisner 		if (discard_my_data)
1110b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111108b165baSPhilipp Reisner 		else
1112b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 
111469a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1116c141ebdaSPhilipp Reisner 		rcu_read_lock();
1117c141ebdaSPhilipp Reisner 	}
1118c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1119c141ebdaSPhilipp Reisner 
1120bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1121bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1122bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11231e86ac48SPhilipp Reisner 		return 0;
1124a1096a6eSPhilipp Reisner 	}
11251e86ac48SPhilipp Reisner 
11261c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112739e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112839e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112939e91a60SLars Ellenberg 	connection->ack_sender =
113039e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1131668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1132668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1133668700b4SPhilipp Reisner 		return 0;
1134668700b4SPhilipp Reisner 	}
1135b411b363SPhilipp Reisner 
11360500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113708b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113808b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113908b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114008b165baSPhilipp Reisner 	 * just to clear a single value. */
1141bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11420500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114308b165baSPhilipp Reisner 
1144d3fcb490SPhilipp Reisner 	return h;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner out_release_sockets:
11477a426fd8SPhilipp Reisner 	if (ad.s_listen)
11487a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11497da35862SPhilipp Reisner 	if (sock.socket)
11507da35862SPhilipp Reisner 		sock_release(sock.socket);
11517da35862SPhilipp Reisner 	if (msock.socket)
11527da35862SPhilipp Reisner 		sock_release(msock.socket);
1153b411b363SPhilipp Reisner 	return -1;
1154b411b363SPhilipp Reisner }
1155b411b363SPhilipp Reisner 
1156bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1157b411b363SPhilipp Reisner {
1158bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1159b411b363SPhilipp Reisner 
11600c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11610c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11620c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11630c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11650c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116602918be2SPhilipp Reisner 		}
11670c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11680c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11690c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11700c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1171e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1172e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1173e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1174b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1175eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1176e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1177e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1178e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1179e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1180e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118177351055SPhilipp Reisner 		pi->vnr = 0;
118202918be2SPhilipp Reisner 	} else {
11831ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1184e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1185bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11868172f3e9SAndreas Gruenbacher 		return -EINVAL;
1187b411b363SPhilipp Reisner 	}
1188e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11898172f3e9SAndreas Gruenbacher 	return 0;
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
1192bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1193257d0af6SPhilipp Reisner {
1194bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
119569bc7bc3SAndreas Gruenbacher 	int err;
1196257d0af6SPhilipp Reisner 
1197bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1198a5c31904SAndreas Gruenbacher 	if (err)
119969bc7bc3SAndreas Gruenbacher 		return err;
1200257d0af6SPhilipp Reisner 
1201bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1202bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1203b411b363SPhilipp Reisner 
120469bc7bc3SAndreas Gruenbacher 	return err;
1205b411b363SPhilipp Reisner }
1206b411b363SPhilipp Reisner 
1207bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1208b411b363SPhilipp Reisner {
1209b411b363SPhilipp Reisner 	int rv;
1210c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
12114b0007c0SPhilipp Reisner 	int vnr;
1212b411b363SPhilipp Reisner 
1213f6ba8636SAndreas Gruenbacher 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1214615e087fSLars Ellenberg 		rcu_read_lock();
1215c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1216c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1217c06ece6bSAndreas Gruenbacher 
1218b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1219615e087fSLars Ellenberg 				continue;
1220b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1221615e087fSLars Ellenberg 			rcu_read_unlock();
12224b0007c0SPhilipp Reisner 
1223f418815fSLars Ellenberg 			/* Right now, we have only this one synchronous code path
1224f418815fSLars Ellenberg 			 * for flushes between request epochs.
1225f418815fSLars Ellenberg 			 * We may want to make those asynchronous,
1226f418815fSLars Ellenberg 			 * or at least parallelize the flushes to the volume devices.
1227f418815fSLars Ellenberg 			 */
1228f418815fSLars Ellenberg 			device->flush_jif = jiffies;
1229f418815fSLars Ellenberg 			set_bit(FLUSH_PENDING, &device->flags);
1230b30ab791SAndreas Gruenbacher 			rv = blkdev_issue_flush(device->ldev->backing_bdev,
1231615e087fSLars Ellenberg 					GFP_NOIO, NULL);
1232f418815fSLars Ellenberg 			clear_bit(FLUSH_PENDING, &device->flags);
1233b411b363SPhilipp Reisner 			if (rv) {
1234d0180171SAndreas Gruenbacher 				drbd_info(device, "local disk flush failed with status %d\n", rv);
1235b411b363SPhilipp Reisner 				/* would rather check on EOPNOTSUPP, but that is not reliable.
1236b411b363SPhilipp Reisner 				 * don't try again for ANY return value != 0
1237b411b363SPhilipp Reisner 				 * if (rv == -EOPNOTSUPP) */
1238f6ba8636SAndreas Gruenbacher 				drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1239b411b363SPhilipp Reisner 			}
1240b30ab791SAndreas Gruenbacher 			put_ldev(device);
124105a10ec7SAndreas Gruenbacher 			kref_put(&device->kref, drbd_destroy_device);
1242615e087fSLars Ellenberg 
1243615e087fSLars Ellenberg 			rcu_read_lock();
1244615e087fSLars Ellenberg 			if (rv)
12454b0007c0SPhilipp Reisner 				break;
1246b411b363SPhilipp Reisner 		}
1247615e087fSLars Ellenberg 		rcu_read_unlock();
1248b411b363SPhilipp Reisner 	}
1249b411b363SPhilipp Reisner }
1250b411b363SPhilipp Reisner 
1251b411b363SPhilipp Reisner /**
1252b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1253b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1254b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1255b411b363SPhilipp Reisner  * @ev:		Epoch event.
1256b411b363SPhilipp Reisner  */
1257bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1258b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1259b411b363SPhilipp Reisner 					       enum epoch_event ev)
1260b411b363SPhilipp Reisner {
12612451fc3bSPhilipp Reisner 	int epoch_size;
1262b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1263b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1264b411b363SPhilipp Reisner 
1265bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1266b411b363SPhilipp Reisner 	do {
1267b411b363SPhilipp Reisner 		next_epoch = NULL;
1268b411b363SPhilipp Reisner 
1269b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1270b411b363SPhilipp Reisner 
1271b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1272b411b363SPhilipp Reisner 		case EV_PUT:
1273b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1274b411b363SPhilipp Reisner 			break;
1275b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1276b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1277b411b363SPhilipp Reisner 			break;
1278b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1279b411b363SPhilipp Reisner 			/* nothing to do*/
1280b411b363SPhilipp Reisner 			break;
1281b411b363SPhilipp Reisner 		}
1282b411b363SPhilipp Reisner 
1283b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1284b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
128580f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1286b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1287bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1288bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1289bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1290b411b363SPhilipp Reisner 			}
12919ed57dcbSLars Ellenberg #if 0
12929ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
12939ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
129480f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1295bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
12969ed57dcbSLars Ellenberg #endif
1297b411b363SPhilipp Reisner 
1298bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1299b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1300b411b363SPhilipp Reisner 				list_del(&epoch->list);
1301b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1302bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1303b411b363SPhilipp Reisner 				kfree(epoch);
1304b411b363SPhilipp Reisner 
1305b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1306b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1307b411b363SPhilipp Reisner 			} else {
1308b411b363SPhilipp Reisner 				epoch->flags = 0;
1309b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1310698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1311b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1312b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1313b411b363SPhilipp Reisner 			}
1314b411b363SPhilipp Reisner 		}
1315b411b363SPhilipp Reisner 
1316b411b363SPhilipp Reisner 		if (!next_epoch)
1317b411b363SPhilipp Reisner 			break;
1318b411b363SPhilipp Reisner 
1319b411b363SPhilipp Reisner 		epoch = next_epoch;
1320b411b363SPhilipp Reisner 	} while (1);
1321b411b363SPhilipp Reisner 
1322bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1323b411b363SPhilipp Reisner 
1324b411b363SPhilipp Reisner 	return rv;
1325b411b363SPhilipp Reisner }
1326b411b363SPhilipp Reisner 
13278fe39aacSPhilipp Reisner static enum write_ordering_e
13288fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
13298fe39aacSPhilipp Reisner {
13308fe39aacSPhilipp Reisner 	struct disk_conf *dc;
13318fe39aacSPhilipp Reisner 
13328fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
13338fe39aacSPhilipp Reisner 
1334f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1335f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1336f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1337f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
13388fe39aacSPhilipp Reisner 
13398fe39aacSPhilipp Reisner 	return wo;
13408fe39aacSPhilipp Reisner }
13418fe39aacSPhilipp Reisner 
1342b411b363SPhilipp Reisner /**
1343b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1344bde89a9eSAndreas Gruenbacher  * @connection:	DRBD connection.
1345b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1346b411b363SPhilipp Reisner  */
13478fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
13488fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1349b411b363SPhilipp Reisner {
1350e9526580SPhilipp Reisner 	struct drbd_device *device;
1351b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
13524b0007c0SPhilipp Reisner 	int vnr;
1353b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1354f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1355f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1356f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1357b411b363SPhilipp Reisner 	};
1358b411b363SPhilipp Reisner 
1359e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1360f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1361b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1362daeda1ccSPhilipp Reisner 	rcu_read_lock();
1363e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
13648fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
13658fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
13668fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
13678fe39aacSPhilipp Reisner 				bdev = NULL;
1368b30ab791SAndreas Gruenbacher 			put_ldev(device);
13694b0007c0SPhilipp Reisner 		}
13708fe39aacSPhilipp Reisner 	}
13718fe39aacSPhilipp Reisner 
13728fe39aacSPhilipp Reisner 	if (bdev)
13738fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
13748fe39aacSPhilipp Reisner 
137570df7092SLars Ellenberg 	rcu_read_unlock();
137670df7092SLars Ellenberg 
1377e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1378f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1379e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1380b411b363SPhilipp Reisner }
1381b411b363SPhilipp Reisner 
1382b411b363SPhilipp Reisner /**
1383fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1384b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1385db830c46SAndreas Gruenbacher  * @peer_req:	peer request
138645bb912bSLars Ellenberg  * @rw:		flag field, see bio->bi_rw
138710f6d992SLars Ellenberg  *
138810f6d992SLars Ellenberg  * May spread the pages to multiple bios,
138910f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
139010f6d992SLars Ellenberg  *
139110f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
139210f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
139310f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
139410f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
139510f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
139610f6d992SLars Ellenberg  *  on certain Xen deployments.
139745bb912bSLars Ellenberg  */
139845bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1399b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1400fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
1401bb3cc85eSMike Christie 			     const unsigned op, const unsigned op_flags,
1402bb3cc85eSMike Christie 			     const int fault_type)
140345bb912bSLars Ellenberg {
140445bb912bSLars Ellenberg 	struct bio *bios = NULL;
140545bb912bSLars Ellenberg 	struct bio *bio;
1406db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1407db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
140811f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
140945bb912bSLars Ellenberg 	unsigned n_bios = 0;
141011f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
141110f6d992SLars Ellenberg 	int err = -ENOMEM;
141245bb912bSLars Ellenberg 
1413a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1414a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1415a0fb3c47SLars Ellenberg 		 * zeroing things out. */
14165dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
141745d2933cSLars Ellenberg 		/* add it to the active list now,
141845d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
141921ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
142021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
142145d2933cSLars Ellenberg 		spin_lock_irq(&device->resource->req_lock);
142245d2933cSLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
142345d2933cSLars Ellenberg 		spin_unlock_irq(&device->resource->req_lock);
1424a0fb3c47SLars Ellenberg 		if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1425d93ba7a5SMartin K. Petersen 			sector, data_size >> 9, GFP_NOIO, false))
1426a0fb3c47SLars Ellenberg 			peer_req->flags |= EE_WAS_ERROR;
1427a0fb3c47SLars Ellenberg 		drbd_endio_write_sec_final(peer_req);
1428a0fb3c47SLars Ellenberg 		return 0;
1429a0fb3c47SLars Ellenberg 	}
1430a0fb3c47SLars Ellenberg 
143154ed4ed8SLars Ellenberg 	/* Discards don't have any payload.
143254ed4ed8SLars Ellenberg 	 * But the scsi layer still expects a bio_vec it can use internally,
143354ed4ed8SLars Ellenberg 	 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
1434a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_IS_TRIM)
143554ed4ed8SLars Ellenberg 		nr_pages = 1;
1436a0fb3c47SLars Ellenberg 
143745bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
143845bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
143945bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
14409476f39dSLars Ellenberg 	 * request in more than one bio.
14419476f39dSLars Ellenberg 	 *
14429476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
14439476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
14449476f39dSLars Ellenberg 	 */
144545bb912bSLars Ellenberg next_bio:
144645bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
144745bb912bSLars Ellenberg 	if (!bio) {
1448a0fb3c47SLars Ellenberg 		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
144945bb912bSLars Ellenberg 		goto fail;
145045bb912bSLars Ellenberg 	}
1451db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
14524f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1453b30ab791SAndreas Gruenbacher 	bio->bi_bdev = device->ldev->backing_bdev;
1454bb3cc85eSMike Christie 	bio_set_op_attrs(bio, op, op_flags);
1455db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1456fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
145745bb912bSLars Ellenberg 
145845bb912bSLars Ellenberg 	bio->bi_next = bios;
145945bb912bSLars Ellenberg 	bios = bio;
146045bb912bSLars Ellenberg 	++n_bios;
146145bb912bSLars Ellenberg 
1462bb3cc85eSMike Christie 	if (op == REQ_OP_DISCARD) {
146311f8b2b6SAndreas Gruenbacher 		bio->bi_iter.bi_size = data_size;
1464a0fb3c47SLars Ellenberg 		goto submit;
1465a0fb3c47SLars Ellenberg 	}
1466a0fb3c47SLars Ellenberg 
146745bb912bSLars Ellenberg 	page_chain_for_each(page) {
146811f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
146945bb912bSLars Ellenberg 		if (!bio_add_page(bio, page, len, 0)) {
147010f6d992SLars Ellenberg 			/* A single page must always be possible!
147110f6d992SLars Ellenberg 			 * But in case it fails anyways,
147210f6d992SLars Ellenberg 			 * we deal with it, and complain (below). */
147310f6d992SLars Ellenberg 			if (bio->bi_vcnt == 0) {
1474d0180171SAndreas Gruenbacher 				drbd_err(device,
147510f6d992SLars Ellenberg 					"bio_add_page failed for len=%u, "
147610f6d992SLars Ellenberg 					"bi_vcnt=0 (bi_sector=%llu)\n",
14774f024f37SKent Overstreet 					len, (uint64_t)bio->bi_iter.bi_sector);
147810f6d992SLars Ellenberg 				err = -ENOSPC;
147910f6d992SLars Ellenberg 				goto fail;
148010f6d992SLars Ellenberg 			}
148145bb912bSLars Ellenberg 			goto next_bio;
148245bb912bSLars Ellenberg 		}
148311f8b2b6SAndreas Gruenbacher 		data_size -= len;
148445bb912bSLars Ellenberg 		sector += len >> 9;
148545bb912bSLars Ellenberg 		--nr_pages;
148645bb912bSLars Ellenberg 	}
148711f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1488a0fb3c47SLars Ellenberg submit:
1489a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
149045bb912bSLars Ellenberg 
1491db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
149221ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
149321ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
149421ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
149545bb912bSLars Ellenberg 	do {
149645bb912bSLars Ellenberg 		bio = bios;
149745bb912bSLars Ellenberg 		bios = bios->bi_next;
149845bb912bSLars Ellenberg 		bio->bi_next = NULL;
149945bb912bSLars Ellenberg 
1500b30ab791SAndreas Gruenbacher 		drbd_generic_make_request(device, fault_type, bio);
150145bb912bSLars Ellenberg 	} while (bios);
150245bb912bSLars Ellenberg 	return 0;
150345bb912bSLars Ellenberg 
150445bb912bSLars Ellenberg fail:
150545bb912bSLars Ellenberg 	while (bios) {
150645bb912bSLars Ellenberg 		bio = bios;
150745bb912bSLars Ellenberg 		bios = bios->bi_next;
150845bb912bSLars Ellenberg 		bio_put(bio);
150945bb912bSLars Ellenberg 	}
151010f6d992SLars Ellenberg 	return err;
151145bb912bSLars Ellenberg }
151245bb912bSLars Ellenberg 
1513b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1514db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
151553840641SAndreas Gruenbacher {
1516db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
151753840641SAndreas Gruenbacher 
1518b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
151953840641SAndreas Gruenbacher 	drbd_clear_interval(i);
152053840641SAndreas Gruenbacher 
15216c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
152253840641SAndreas Gruenbacher 	if (i->waiting)
1523b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
152453840641SAndreas Gruenbacher }
152553840641SAndreas Gruenbacher 
1526bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
152777fede51SPhilipp Reisner {
1528c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
152977fede51SPhilipp Reisner 	int vnr;
153077fede51SPhilipp Reisner 
153177fede51SPhilipp Reisner 	rcu_read_lock();
1532c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1533c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1534c06ece6bSAndreas Gruenbacher 
1535b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
153677fede51SPhilipp Reisner 		rcu_read_unlock();
1537b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
153805a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
153977fede51SPhilipp Reisner 		rcu_read_lock();
154077fede51SPhilipp Reisner 	}
154177fede51SPhilipp Reisner 	rcu_read_unlock();
154277fede51SPhilipp Reisner }
154377fede51SPhilipp Reisner 
1544bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1545b411b363SPhilipp Reisner {
15462451fc3bSPhilipp Reisner 	int rv;
1547e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1548b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1549b411b363SPhilipp Reisner 
15509ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
15519ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
15529ed57dcbSLars Ellenberg 	 */
1553bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1554bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1555bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1556b411b363SPhilipp Reisner 
1557b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1558b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1559b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1560b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1561b411b363SPhilipp Reisner 	 * completed. */
1562e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1563f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1564b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
156582bc0194SAndreas Gruenbacher 			return 0;
1566b411b363SPhilipp Reisner 
1567b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1568b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1569b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
15702451fc3bSPhilipp Reisner 		if (epoch)
15712451fc3bSPhilipp Reisner 			break;
15722451fc3bSPhilipp Reisner 		else
15731ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
15742451fc3bSPhilipp Reisner 			/* Fall through */
15752451fc3bSPhilipp Reisner 
1576f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1577f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1578bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1579bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
15802451fc3bSPhilipp Reisner 
1581bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
15822451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
15832451fc3bSPhilipp Reisner 			if (epoch)
15842451fc3bSPhilipp Reisner 				break;
1585b411b363SPhilipp Reisner 		}
1586b411b363SPhilipp Reisner 
158782bc0194SAndreas Gruenbacher 		return 0;
15882451fc3bSPhilipp Reisner 	default:
1589e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1590e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
159182bc0194SAndreas Gruenbacher 		return -EIO;
1592b411b363SPhilipp Reisner 	}
1593b411b363SPhilipp Reisner 
1594b411b363SPhilipp Reisner 	epoch->flags = 0;
1595b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1596b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1597b411b363SPhilipp Reisner 
1598bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1599bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1600bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1601bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1602bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1603b411b363SPhilipp Reisner 	} else {
1604b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1605b411b363SPhilipp Reisner 		kfree(epoch);
1606b411b363SPhilipp Reisner 	}
1607bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1608b411b363SPhilipp Reisner 
160982bc0194SAndreas Gruenbacher 	return 0;
1610b411b363SPhilipp Reisner }
1611b411b363SPhilipp Reisner 
1612b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
1613b411b363SPhilipp Reisner  * and from receive_Data */
1614f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
161569a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1616a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1617b411b363SPhilipp Reisner {
161869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1619b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
1620db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1621b411b363SPhilipp Reisner 	struct page *page;
162211f8b2b6SAndreas Gruenbacher 	int digest_size, err;
162311f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
162469a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
162569a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
16266b4388acSPhilipp Reisner 	unsigned long *data;
1627a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1628b411b363SPhilipp Reisner 
162911f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1630a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
16319534d671SHerbert Xu 		digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
16329f5bdc33SAndreas Gruenbacher 		/*
16339f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
16349f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
16359f5bdc33SAndreas Gruenbacher 		 */
163611f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1637a5c31904SAndreas Gruenbacher 		if (err)
1638b411b363SPhilipp Reisner 			return NULL;
163911f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
164088104ca4SAndreas Gruenbacher 	}
1641b411b363SPhilipp Reisner 
1642a0fb3c47SLars Ellenberg 	if (trim) {
1643a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, data_size == 0);
1644a0fb3c47SLars Ellenberg 		data_size = be32_to_cpu(trim->size);
1645a0fb3c47SLars Ellenberg 	}
1646a0fb3c47SLars Ellenberg 
1647841ce241SAndreas Gruenbacher 	if (!expect(IS_ALIGNED(data_size, 512)))
1648841ce241SAndreas Gruenbacher 		return NULL;
1649a0fb3c47SLars Ellenberg 	/* prepare for larger trim requests. */
1650a0fb3c47SLars Ellenberg 	if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
1651841ce241SAndreas Gruenbacher 		return NULL;
1652b411b363SPhilipp Reisner 
16536666032aSLars Ellenberg 	/* even though we trust out peer,
16546666032aSLars Ellenberg 	 * we sometimes have to double check. */
16556666032aSLars Ellenberg 	if (sector + (data_size>>9) > capacity) {
1656d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1657fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
16586666032aSLars Ellenberg 			(unsigned long long)capacity,
16596666032aSLars Ellenberg 			(unsigned long long)sector, data_size);
16606666032aSLars Ellenberg 		return NULL;
16616666032aSLars Ellenberg 	}
16626666032aSLars Ellenberg 
1663b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1664b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1665b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
1666a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
1667db830c46SAndreas Gruenbacher 	if (!peer_req)
1668b411b363SPhilipp Reisner 		return NULL;
166945bb912bSLars Ellenberg 
167021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
1671a0fb3c47SLars Ellenberg 	if (trim)
167281a3537aSLars Ellenberg 		return peer_req;
1673a73ff323SLars Ellenberg 
1674b411b363SPhilipp Reisner 	ds = data_size;
1675db830c46SAndreas Gruenbacher 	page = peer_req->pages;
167645bb912bSLars Ellenberg 	page_chain_for_each(page) {
167745bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
16786b4388acSPhilipp Reisner 		data = kmap(page);
167969a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1680b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1681d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
16826b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
16836b4388acSPhilipp Reisner 		}
1684b411b363SPhilipp Reisner 		kunmap(page);
1685a5c31904SAndreas Gruenbacher 		if (err) {
1686b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1687b411b363SPhilipp Reisner 			return NULL;
1688b411b363SPhilipp Reisner 		}
1689a5c31904SAndreas Gruenbacher 		ds -= len;
1690b411b363SPhilipp Reisner 	}
1691b411b363SPhilipp Reisner 
169211f8b2b6SAndreas Gruenbacher 	if (digest_size) {
169369a22773SAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
169411f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1695d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1696470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1697b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1698b411b363SPhilipp Reisner 			return NULL;
1699b411b363SPhilipp Reisner 		}
1700b411b363SPhilipp Reisner 	}
1701b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1702db830c46SAndreas Gruenbacher 	return peer_req;
1703b411b363SPhilipp Reisner }
1704b411b363SPhilipp Reisner 
1705b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1706b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1707b411b363SPhilipp Reisner  */
170869a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1709b411b363SPhilipp Reisner {
1710b411b363SPhilipp Reisner 	struct page *page;
1711a5c31904SAndreas Gruenbacher 	int err = 0;
1712b411b363SPhilipp Reisner 	void *data;
1713b411b363SPhilipp Reisner 
1714c3470cdeSLars Ellenberg 	if (!data_size)
1715fc5be839SAndreas Gruenbacher 		return 0;
1716c3470cdeSLars Ellenberg 
171769a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1718b411b363SPhilipp Reisner 
1719b411b363SPhilipp Reisner 	data = kmap(page);
1720b411b363SPhilipp Reisner 	while (data_size) {
1721fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1722fc5be839SAndreas Gruenbacher 
172369a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1724a5c31904SAndreas Gruenbacher 		if (err)
1725b411b363SPhilipp Reisner 			break;
1726a5c31904SAndreas Gruenbacher 		data_size -= len;
1727b411b363SPhilipp Reisner 	}
1728b411b363SPhilipp Reisner 	kunmap(page);
172969a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1730fc5be839SAndreas Gruenbacher 	return err;
1731b411b363SPhilipp Reisner }
1732b411b363SPhilipp Reisner 
173369a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1734b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1735b411b363SPhilipp Reisner {
17367988613bSKent Overstreet 	struct bio_vec bvec;
17377988613bSKent Overstreet 	struct bvec_iter iter;
1738b411b363SPhilipp Reisner 	struct bio *bio;
173911f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
174069a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
174169a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
1742b411b363SPhilipp Reisner 
174311f8b2b6SAndreas Gruenbacher 	digest_size = 0;
174469a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
17459534d671SHerbert Xu 		digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
174611f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1747a5c31904SAndreas Gruenbacher 		if (err)
1748a5c31904SAndreas Gruenbacher 			return err;
174911f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
175088104ca4SAndreas Gruenbacher 	}
1751b411b363SPhilipp Reisner 
1752b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
1753b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
175469a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
1755b411b363SPhilipp Reisner 
1756b411b363SPhilipp Reisner 	bio = req->master_bio;
175769a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
1758b411b363SPhilipp Reisner 
17597988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
17607988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
17617988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
176269a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
17637988613bSKent Overstreet 		kunmap(bvec.bv_page);
1764a5c31904SAndreas Gruenbacher 		if (err)
1765a5c31904SAndreas Gruenbacher 			return err;
1766a5c31904SAndreas Gruenbacher 		data_size -= expect;
1767b411b363SPhilipp Reisner 	}
1768b411b363SPhilipp Reisner 
176911f8b2b6SAndreas Gruenbacher 	if (digest_size) {
177069a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
177111f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
177269a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
177328284cefSAndreas Gruenbacher 			return -EINVAL;
1774b411b363SPhilipp Reisner 		}
1775b411b363SPhilipp Reisner 	}
1776b411b363SPhilipp Reisner 
177769a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
177828284cefSAndreas Gruenbacher 	return 0;
1779b411b363SPhilipp Reisner }
1780b411b363SPhilipp Reisner 
1781a990be46SAndreas Gruenbacher /*
1782668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
1783a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
1784a990be46SAndreas Gruenbacher  */
178599920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
1786b411b363SPhilipp Reisner {
17878050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
1788a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
1789a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1790a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1791db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
179299920dc5SAndreas Gruenbacher 	int err;
1793b411b363SPhilipp Reisner 
17940b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
1795b411b363SPhilipp Reisner 
1796db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1797b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
1798a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
1799b411b363SPhilipp Reisner 	} else {
1800b411b363SPhilipp Reisner 		/* Record failure to sync */
1801b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
1802b411b363SPhilipp Reisner 
1803a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
1804b411b363SPhilipp Reisner 	}
1805b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1806b411b363SPhilipp Reisner 
180799920dc5SAndreas Gruenbacher 	return err;
1808b411b363SPhilipp Reisner }
1809b411b363SPhilipp Reisner 
181069a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1811a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
1812b411b363SPhilipp Reisner {
181369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1814db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1815b411b363SPhilipp Reisner 
1816a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
1817db830c46SAndreas Gruenbacher 	if (!peer_req)
181845bb912bSLars Ellenberg 		goto fail;
1819b411b363SPhilipp Reisner 
1820b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
1821b411b363SPhilipp Reisner 
1822b30ab791SAndreas Gruenbacher 	inc_unacked(device);
1823b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
1824b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
1825b411b363SPhilipp Reisner 
1826a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
182721ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
182845bb912bSLars Ellenberg 
18290500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1830b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
18310500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
1832b411b363SPhilipp Reisner 
1833a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
1834bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
1835bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
1836e1c1b0fcSAndreas Gruenbacher 		return 0;
183745bb912bSLars Ellenberg 
183810f6d992SLars Ellenberg 	/* don't care for the reason here */
1839d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
18400500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1841a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
18420500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
184322cc37a9SLars Ellenberg 
1844b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
184545bb912bSLars Ellenberg fail:
1846b30ab791SAndreas Gruenbacher 	put_ldev(device);
1847e1c1b0fcSAndreas Gruenbacher 	return -EIO;
1848b411b363SPhilipp Reisner }
1849b411b363SPhilipp Reisner 
1850668eebc6SAndreas Gruenbacher static struct drbd_request *
1851b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
1852bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
1853b411b363SPhilipp Reisner {
1854b411b363SPhilipp Reisner 	struct drbd_request *req;
1855668eebc6SAndreas Gruenbacher 
1856bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
1857bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
18585e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
1859668eebc6SAndreas Gruenbacher 		return req;
1860c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
1861d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
1862c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
1863c3afd8f5SAndreas Gruenbacher 	}
1864668eebc6SAndreas Gruenbacher 	return NULL;
1865668eebc6SAndreas Gruenbacher }
1866668eebc6SAndreas Gruenbacher 
1867bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
1868b411b363SPhilipp Reisner {
18699f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
1870b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
1871b411b363SPhilipp Reisner 	struct drbd_request *req;
1872b411b363SPhilipp Reisner 	sector_t sector;
187382bc0194SAndreas Gruenbacher 	int err;
1874e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
18754a76b161SAndreas Gruenbacher 
18769f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
18779f4fe9adSAndreas Gruenbacher 	if (!peer_device)
18784a76b161SAndreas Gruenbacher 		return -EIO;
18799f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
1880b411b363SPhilipp Reisner 
1881b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
1882b411b363SPhilipp Reisner 
18830500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
1884b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
18850500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
1886c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
188782bc0194SAndreas Gruenbacher 		return -EIO;
1888b411b363SPhilipp Reisner 
188924c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
1890b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
1891b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
189269a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
189382bc0194SAndreas Gruenbacher 	if (!err)
18948554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
1895b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
1896b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
1897b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
1898b411b363SPhilipp Reisner 
189982bc0194SAndreas Gruenbacher 	return err;
1900b411b363SPhilipp Reisner }
1901b411b363SPhilipp Reisner 
1902bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
1903b411b363SPhilipp Reisner {
19049f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
1905b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
1906b411b363SPhilipp Reisner 	sector_t sector;
190782bc0194SAndreas Gruenbacher 	int err;
1908e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
19094a76b161SAndreas Gruenbacher 
19109f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
19119f4fe9adSAndreas Gruenbacher 	if (!peer_device)
19124a76b161SAndreas Gruenbacher 		return -EIO;
19139f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
1914b411b363SPhilipp Reisner 
1915b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
19160b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
1917b411b363SPhilipp Reisner 
1918b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1919b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
1920b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
1921fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
1922a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
1923b411b363SPhilipp Reisner 	} else {
1924b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1925d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
1926b411b363SPhilipp Reisner 
192769a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
1928b411b363SPhilipp Reisner 
192969a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
1930b411b363SPhilipp Reisner 	}
1931b411b363SPhilipp Reisner 
1932b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
1933778f271dSPhilipp Reisner 
193482bc0194SAndreas Gruenbacher 	return err;
1935b411b363SPhilipp Reisner }
1936b411b363SPhilipp Reisner 
1937b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
19387be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
1939b411b363SPhilipp Reisner {
19407be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
19417be8da07SAndreas Gruenbacher 	struct drbd_request *req;
1942b411b363SPhilipp Reisner 
1943b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
19447be8da07SAndreas Gruenbacher 		if (!i->local)
19457be8da07SAndreas Gruenbacher 			continue;
19467be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
19477be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
19487be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
19497be8da07SAndreas Gruenbacher 			continue;
19502312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
19512312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
1952d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
19537be8da07SAndreas Gruenbacher 	}
19547be8da07SAndreas Gruenbacher }
19557be8da07SAndreas Gruenbacher 
1956a990be46SAndreas Gruenbacher /*
1957668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
1958b411b363SPhilipp Reisner  */
195999920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
1960b411b363SPhilipp Reisner {
19618050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
1962a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
1963a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1964a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1965db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
196699920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
1967b411b363SPhilipp Reisner 
1968303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
1969db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1970b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1971b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
1972db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
1973b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
1974a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
1975b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
1976b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
1977b411b363SPhilipp Reisner 		} else {
1978a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
1979b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
1980b411b363SPhilipp Reisner 			 * maybe assert this?  */
1981b411b363SPhilipp Reisner 		}
1982b30ab791SAndreas Gruenbacher 		dec_unacked(device);
1983b411b363SPhilipp Reisner 	}
198408d0dabfSLars Ellenberg 
1985b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
1986b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
1987302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
19880500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
19890b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
1990b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
19917be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
1992b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
19930500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
1994bb3bfe96SAndreas Gruenbacher 	} else
19950b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
1996b411b363SPhilipp Reisner 
19975dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
1998b411b363SPhilipp Reisner 
199999920dc5SAndreas Gruenbacher 	return err;
2000b411b363SPhilipp Reisner }
2001b411b363SPhilipp Reisner 
2002a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2003b411b363SPhilipp Reisner {
20048050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2005a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2006a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
200799920dc5SAndreas Gruenbacher 	int err;
2008b411b363SPhilipp Reisner 
2009a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2010a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2011b411b363SPhilipp Reisner 
201299920dc5SAndreas Gruenbacher 	return err;
2013b411b363SPhilipp Reisner }
2014b411b363SPhilipp Reisner 
2015d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2016b6a370baSPhilipp Reisner {
2017a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
20187be8da07SAndreas Gruenbacher }
2019b6a370baSPhilipp Reisner 
202099920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
20217be8da07SAndreas Gruenbacher {
2022a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2023a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2024a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
20257be8da07SAndreas Gruenbacher 
2026a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2027d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
20287be8da07SAndreas Gruenbacher }
20297be8da07SAndreas Gruenbacher 
20303e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
20313e394da1SAndreas Gruenbacher {
20323e394da1SAndreas Gruenbacher 	/*
20333e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
20343e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
20353e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
20363e394da1SAndreas Gruenbacher 	 */
20373e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
20383e394da1SAndreas Gruenbacher }
20393e394da1SAndreas Gruenbacher 
20403e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
20413e394da1SAndreas Gruenbacher {
20423e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
20433e394da1SAndreas Gruenbacher }
20443e394da1SAndreas Gruenbacher 
204569a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
20463e394da1SAndreas Gruenbacher {
204769a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
20483c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
20493e394da1SAndreas Gruenbacher 
205069a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2051b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2052b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2053b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2054b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2055b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
20563c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2057b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
20583e394da1SAndreas Gruenbacher 	}
20597be8da07SAndreas Gruenbacher }
20603e394da1SAndreas Gruenbacher 
2061d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2062d93f6302SLars Ellenberg {
2063d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2064d93f6302SLars Ellenberg }
2065d93f6302SLars Ellenberg 
2066d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2067b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2068d93f6302SLars Ellenberg {
2069d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
2070b6a370baSPhilipp Reisner 	bool rv = 0;
2071b6a370baSPhilipp Reisner 
20720500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2073a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2074d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2075d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
2076b6a370baSPhilipp Reisner 			rv = 1;
2077b6a370baSPhilipp Reisner 			break;
2078b6a370baSPhilipp Reisner 		}
2079b6a370baSPhilipp Reisner 	}
20800500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2081b6a370baSPhilipp Reisner 
2082b6a370baSPhilipp Reisner 	return rv;
2083b6a370baSPhilipp Reisner }
2084b6a370baSPhilipp Reisner 
2085b411b363SPhilipp Reisner /* Called from receive_Data.
2086b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2087b411b363SPhilipp Reisner  *
2088b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2089b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2090b411b363SPhilipp Reisner  * been sent.
2091b411b363SPhilipp Reisner  *
2092b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2093b411b363SPhilipp Reisner  *
2094b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2095b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2096b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2097b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2098b411b363SPhilipp Reisner  *
2099b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2100b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2101b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2102b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2103b411b363SPhilipp Reisner  *
2104b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2105b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
210669a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2107b411b363SPhilipp Reisner {
210869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2109b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2110b411b363SPhilipp Reisner 	long timeout;
2111b874d231SPhilipp Reisner 	int ret = 0, tp;
21127be8da07SAndreas Gruenbacher 
211369a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
21147be8da07SAndreas Gruenbacher 		return 0;
21157be8da07SAndreas Gruenbacher 
2116b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2117b411b363SPhilipp Reisner 	for (;;) {
2118b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2119b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2120b411b363SPhilipp Reisner 			break;
21217be8da07SAndreas Gruenbacher 		}
2122b874d231SPhilipp Reisner 
2123b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2124b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2125b411b363SPhilipp Reisner 			break;
2126b411b363SPhilipp Reisner 		}
2127b874d231SPhilipp Reisner 
2128b874d231SPhilipp Reisner 		rcu_read_lock();
21295dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2130b874d231SPhilipp Reisner 		rcu_read_unlock();
2131b874d231SPhilipp Reisner 
2132b874d231SPhilipp Reisner 		if (!tp)
2133b874d231SPhilipp Reisner 			break;
2134b874d231SPhilipp Reisner 
2135b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2136b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2137b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
213844ed167dSPhilipp Reisner 		rcu_read_lock();
213969a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
214044ed167dSPhilipp Reisner 		rcu_read_unlock();
214171b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2142b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
21437be8da07SAndreas Gruenbacher 		if (!timeout) {
2144b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2145d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2146b411b363SPhilipp Reisner 			break;
2147b411b363SPhilipp Reisner 		}
2148b411b363SPhilipp Reisner 	}
2149b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2150b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2151b411b363SPhilipp Reisner 	return ret;
2152b411b363SPhilipp Reisner }
2153b411b363SPhilipp Reisner 
2154688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2155688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2156688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2157bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf)
215876d2e7ecSPhilipp Reisner {
215976d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
216076d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
2161bb3cc85eSMike Christie 		(dpf & DP_FLUSH ? REQ_FLUSH : 0);
2162bb3cc85eSMike Christie }
2163bb3cc85eSMike Christie 
2164bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf)
2165bb3cc85eSMike Christie {
2166bb3cc85eSMike Christie 	if (dpf & DP_DISCARD)
2167bb3cc85eSMike Christie 		return REQ_OP_DISCARD;
2168bb3cc85eSMike Christie 	else
2169bb3cc85eSMike Christie 		return REQ_OP_WRITE;
217076d2e7ecSPhilipp Reisner }
217176d2e7ecSPhilipp Reisner 
2172b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
21737be8da07SAndreas Gruenbacher 				    unsigned int size)
2174b411b363SPhilipp Reisner {
21757be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21767be8da07SAndreas Gruenbacher 
21777be8da07SAndreas Gruenbacher     repeat:
2178b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21797be8da07SAndreas Gruenbacher 		struct drbd_request *req;
21807be8da07SAndreas Gruenbacher 		struct bio_and_error m;
21817be8da07SAndreas Gruenbacher 
21827be8da07SAndreas Gruenbacher 		if (!i->local)
21837be8da07SAndreas Gruenbacher 			continue;
21847be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
21857be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
21867be8da07SAndreas Gruenbacher 			continue;
21877be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
21887be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
21890500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
21907be8da07SAndreas Gruenbacher 		if (m.bio)
2191b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
21920500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
21937be8da07SAndreas Gruenbacher 		goto repeat;
21947be8da07SAndreas Gruenbacher 	}
21957be8da07SAndreas Gruenbacher }
21967be8da07SAndreas Gruenbacher 
2197b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
21987be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
21997be8da07SAndreas Gruenbacher {
2200e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2201bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
22027be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
22037be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
22047be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
22057be8da07SAndreas Gruenbacher 	bool equal;
22067be8da07SAndreas Gruenbacher 	int err;
22077be8da07SAndreas Gruenbacher 
22087be8da07SAndreas Gruenbacher 	/*
22097be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
22107be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
22117be8da07SAndreas Gruenbacher 	 */
2212b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
22137be8da07SAndreas Gruenbacher 
22147be8da07SAndreas Gruenbacher     repeat:
2215b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22167be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
22177be8da07SAndreas Gruenbacher 			continue;
221808d0dabfSLars Ellenberg 		if (i->completed)
221908d0dabfSLars Ellenberg 			continue;
22207be8da07SAndreas Gruenbacher 
22217be8da07SAndreas Gruenbacher 		if (!i->local) {
22227be8da07SAndreas Gruenbacher 			/*
22237be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
22247be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
22257be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
22267be8da07SAndreas Gruenbacher 			 */
2227b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
22287be8da07SAndreas Gruenbacher 			if (err)
22297be8da07SAndreas Gruenbacher 				goto out;
22307be8da07SAndreas Gruenbacher 			goto repeat;
22317be8da07SAndreas Gruenbacher 		}
22327be8da07SAndreas Gruenbacher 
22337be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
22347be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
22357be8da07SAndreas Gruenbacher 			/*
22367be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2237d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2238d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2239d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
22407be8da07SAndreas Gruenbacher 			 */
2241d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
22427be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
22437be8da07SAndreas Gruenbacher 
22447be8da07SAndreas Gruenbacher 			if (!equal)
2245d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
22467be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
22477be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
22487be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
22497be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2250d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
22517be8da07SAndreas Gruenbacher 
2252a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
22537be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2254a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2255668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
22567be8da07SAndreas Gruenbacher 
22577be8da07SAndreas Gruenbacher 			err = -ENOENT;
22587be8da07SAndreas Gruenbacher 			goto out;
22597be8da07SAndreas Gruenbacher 		} else {
22607be8da07SAndreas Gruenbacher 			struct drbd_request *req =
22617be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
22627be8da07SAndreas Gruenbacher 
22637be8da07SAndreas Gruenbacher 			if (!equal)
2264d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
22657be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
22667be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
22677be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
22687be8da07SAndreas Gruenbacher 
22697be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
22707be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
22717be8da07SAndreas Gruenbacher 				/*
22727be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2273d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2274d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2275d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
22767be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
22777be8da07SAndreas Gruenbacher 				 *
22787be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
22797be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
22807be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
22817be8da07SAndreas Gruenbacher 				 */
2282b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
22837be8da07SAndreas Gruenbacher 				if (err) {
2284e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2285b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
22867be8da07SAndreas Gruenbacher 					goto out;
22877be8da07SAndreas Gruenbacher 				}
22887be8da07SAndreas Gruenbacher 				goto repeat;
22897be8da07SAndreas Gruenbacher 			}
22907be8da07SAndreas Gruenbacher 			/*
22917be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
22927be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
22937be8da07SAndreas Gruenbacher 			 */
22947be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
22957be8da07SAndreas Gruenbacher 		}
22967be8da07SAndreas Gruenbacher 	}
22977be8da07SAndreas Gruenbacher 	err = 0;
22987be8da07SAndreas Gruenbacher 
22997be8da07SAndreas Gruenbacher     out:
23007be8da07SAndreas Gruenbacher 	if (err)
2301b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
23027be8da07SAndreas Gruenbacher 	return err;
23037be8da07SAndreas Gruenbacher }
23047be8da07SAndreas Gruenbacher 
2305b411b363SPhilipp Reisner /* mirrored write */
2306bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2307b411b363SPhilipp Reisner {
23089f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2309b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
231021ae5d7fSLars Ellenberg 	struct net_conf *nc;
2311b411b363SPhilipp Reisner 	sector_t sector;
2312db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2313e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
23147be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2315bb3cc85eSMike Christie 	int op, op_flags;
2316b411b363SPhilipp Reisner 	u32 dp_flags;
2317302bdeaeSPhilipp Reisner 	int err, tp;
23187be8da07SAndreas Gruenbacher 
23199f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
23209f4fe9adSAndreas Gruenbacher 	if (!peer_device)
23214a76b161SAndreas Gruenbacher 		return -EIO;
23229f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2323b411b363SPhilipp Reisner 
2324b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
232582bc0194SAndreas Gruenbacher 		int err2;
2326b411b363SPhilipp Reisner 
232769a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
232869a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2329bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
233069a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
233182bc0194SAndreas Gruenbacher 		if (!err)
233282bc0194SAndreas Gruenbacher 			err = err2;
233382bc0194SAndreas Gruenbacher 		return err;
2334b411b363SPhilipp Reisner 	}
2335b411b363SPhilipp Reisner 
2336fcefa62eSAndreas Gruenbacher 	/*
2337fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2338fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2339fcefa62eSAndreas Gruenbacher 	 * end of this function.
2340fcefa62eSAndreas Gruenbacher 	 */
2341b411b363SPhilipp Reisner 
2342b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2343a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2344db830c46SAndreas Gruenbacher 	if (!peer_req) {
2345b30ab791SAndreas Gruenbacher 		put_ldev(device);
234682bc0194SAndreas Gruenbacher 		return -EIO;
2347b411b363SPhilipp Reisner 	}
2348b411b363SPhilipp Reisner 
2349a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
235021ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
235121ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2352b411b363SPhilipp Reisner 
2353688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2354bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2355bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2356a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2357a0fb3c47SLars Ellenberg 		struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2358a0fb3c47SLars Ellenberg 		peer_req->flags |= EE_IS_TRIM;
2359a0fb3c47SLars Ellenberg 		if (!blk_queue_discard(q))
2360a0fb3c47SLars Ellenberg 			peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2361a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2362bb3cc85eSMike Christie 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2363a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2364a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
23650b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
23660b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2367a73ff323SLars Ellenberg 	}
2368688593c5SLars Ellenberg 
2369688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2370db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2371688593c5SLars Ellenberg 
2372bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2373bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2374db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2375db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2376bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2377b411b363SPhilipp Reisner 
2378302bdeaeSPhilipp Reisner 	rcu_read_lock();
237921ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
238021ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
238121ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
238221ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
238321ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
238421ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
238521ae5d7fSLars Ellenberg 			break;
238621ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
238721ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
238821ae5d7fSLars Ellenberg 			break;
238921ae5d7fSLars Ellenberg 		}
239021ae5d7fSLars Ellenberg 	}
2391302bdeaeSPhilipp Reisner 	rcu_read_unlock();
239221ae5d7fSLars Ellenberg 
239321ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
239421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
239521ae5d7fSLars Ellenberg 		inc_unacked(device);
239621ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
239721ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
239821ae5d7fSLars Ellenberg 	}
239921ae5d7fSLars Ellenberg 
240021ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
240121ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
240221ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
24035dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
240421ae5d7fSLars Ellenberg 	}
240521ae5d7fSLars Ellenberg 
2406302bdeaeSPhilipp Reisner 	if (tp) {
240721ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
240821ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2409302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
241069a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
24117be8da07SAndreas Gruenbacher 		if (err)
2412b411b363SPhilipp Reisner 			goto out_interrupted;
24130500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2414b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
24157be8da07SAndreas Gruenbacher 		if (err) {
24160500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
24177be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2418b30ab791SAndreas Gruenbacher 				put_ldev(device);
241982bc0194SAndreas Gruenbacher 				return 0;
2420b411b363SPhilipp Reisner 			}
2421b411b363SPhilipp Reisner 			goto out_interrupted;
2422b411b363SPhilipp Reisner 		}
2423b874d231SPhilipp Reisner 	} else {
242469a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
24250500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2426b874d231SPhilipp Reisner 	}
2427a0fb3c47SLars Ellenberg 	/* if we use the zeroout fallback code, we process synchronously
2428a0fb3c47SLars Ellenberg 	 * and we wait for all pending requests, respectively wait for
2429a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2430a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2431a0fb3c47SLars Ellenberg 	if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2432b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
24330500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2434b411b363SPhilipp Reisner 
2435b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2436b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2437b6a370baSPhilipp Reisner 
2438b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2439b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2440b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2441db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
24424dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
244321ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2444b411b363SPhilipp Reisner 	}
2445b411b363SPhilipp Reisner 
2446bb3cc85eSMike Christie 	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2447bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
244882bc0194SAndreas Gruenbacher 	if (!err)
244982bc0194SAndreas Gruenbacher 		return 0;
2450b411b363SPhilipp Reisner 
245110f6d992SLars Ellenberg 	/* don't care for the reason here */
2452d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
24530500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2454a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2455b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
24560500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
245721ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
245821ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2459b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
246021ae5d7fSLars Ellenberg 	}
246122cc37a9SLars Ellenberg 
2462b411b363SPhilipp Reisner out_interrupted:
2463bde89a9eSAndreas Gruenbacher 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
2464b30ab791SAndreas Gruenbacher 	put_ldev(device);
2465b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
246682bc0194SAndreas Gruenbacher 	return err;
2467b411b363SPhilipp Reisner }
2468b411b363SPhilipp Reisner 
24690f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
24700f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
24710f0601f4SLars Ellenberg  *
24720f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
24730f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
24740f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
24750f0601f4SLars Ellenberg  * activity, it obviously is "busy".
24760f0601f4SLars Ellenberg  *
24770f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
24780f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
24790f0601f4SLars Ellenberg  */
2480ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2481ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2482e8299874SLars Ellenberg {
2483e8299874SLars Ellenberg 	struct lc_element *tmp;
2484ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2485e8299874SLars Ellenberg 
2486ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2487ad3fee79SLars Ellenberg 		return throttle;
2488e8299874SLars Ellenberg 
2489e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2490e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2491e8299874SLars Ellenberg 	if (tmp) {
2492e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2493e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2494e8299874SLars Ellenberg 			throttle = false;
2495ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2496ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2497e8299874SLars Ellenberg 	}
2498e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2499e8299874SLars Ellenberg 
2500e8299874SLars Ellenberg 	return throttle;
2501e8299874SLars Ellenberg }
2502e8299874SLars Ellenberg 
2503e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
25040f0601f4SLars Ellenberg {
2505b30ab791SAndreas Gruenbacher 	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
25060f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2507daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2508e8299874SLars Ellenberg 	int curr_events;
2509daeda1ccSPhilipp Reisner 
2510daeda1ccSPhilipp Reisner 	rcu_read_lock();
2511b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2512daeda1ccSPhilipp Reisner 	rcu_read_unlock();
25130f0601f4SLars Ellenberg 
25140f0601f4SLars Ellenberg 	/* feature disabled? */
2515daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2516e8299874SLars Ellenberg 		return false;
2517e3555d85SPhilipp Reisner 
25180f0601f4SLars Ellenberg 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
25190f0601f4SLars Ellenberg 		      (int)part_stat_read(&disk->part0, sectors[1]) -
2520b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2521ad3fee79SLars Ellenberg 
2522ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2523ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
25240f0601f4SLars Ellenberg 		unsigned long rs_left;
25250f0601f4SLars Ellenberg 		int i;
25260f0601f4SLars Ellenberg 
2527b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
25280f0601f4SLars Ellenberg 
25290f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
25300f0601f4SLars Ellenberg 		 * approx. */
2531b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
25322649f080SLars Ellenberg 
2533b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2534b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
25352649f080SLars Ellenberg 		else
2536b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
25370f0601f4SLars Ellenberg 
2538b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
25390f0601f4SLars Ellenberg 		if (!dt)
25400f0601f4SLars Ellenberg 			dt++;
2541b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
25420f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
25430f0601f4SLars Ellenberg 
2544daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2545e8299874SLars Ellenberg 			return true;
25460f0601f4SLars Ellenberg 	}
2547e8299874SLars Ellenberg 	return false;
25480f0601f4SLars Ellenberg }
25490f0601f4SLars Ellenberg 
2550bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2551b411b363SPhilipp Reisner {
25529f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2553b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2554b411b363SPhilipp Reisner 	sector_t sector;
25554a76b161SAndreas Gruenbacher 	sector_t capacity;
2556db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2557b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2558b18b37beSPhilipp Reisner 	int size, verb;
2559b411b363SPhilipp Reisner 	unsigned int fault_type;
2560e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
25614a76b161SAndreas Gruenbacher 
25629f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25639f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25644a76b161SAndreas Gruenbacher 		return -EIO;
25659f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2566b30ab791SAndreas Gruenbacher 	capacity = drbd_get_capacity(device->this_bdev);
2567b411b363SPhilipp Reisner 
2568b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2569b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2570b411b363SPhilipp Reisner 
2571c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2572d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2573b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
257482bc0194SAndreas Gruenbacher 		return -EINVAL;
2575b411b363SPhilipp Reisner 	}
2576b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2577d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2578b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
257982bc0194SAndreas Gruenbacher 		return -EINVAL;
2580b411b363SPhilipp Reisner 	}
2581b411b363SPhilipp Reisner 
2582b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2583b18b37beSPhilipp Reisner 		verb = 1;
2584e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2585b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
258669a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2587b18b37beSPhilipp Reisner 			break;
2588b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2589b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2590b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
259169a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2592b18b37beSPhilipp Reisner 			break;
2593b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2594b18b37beSPhilipp Reisner 			verb = 0;
2595b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
259669a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2597b18b37beSPhilipp Reisner 			break;
2598b18b37beSPhilipp Reisner 		default:
259949ba9b1bSAndreas Gruenbacher 			BUG();
2600b18b37beSPhilipp Reisner 		}
2601b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2602d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2603b411b363SPhilipp Reisner 			    "no local data.\n");
2604b18b37beSPhilipp Reisner 
2605a821cc4aSLars Ellenberg 		/* drain possibly payload */
260669a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2607b411b363SPhilipp Reisner 	}
2608b411b363SPhilipp Reisner 
2609b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2610b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2611b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2612a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2613a0fb3c47SLars Ellenberg 			true /* has real payload */, GFP_NOIO);
2614db830c46SAndreas Gruenbacher 	if (!peer_req) {
2615b30ab791SAndreas Gruenbacher 		put_ldev(device);
261682bc0194SAndreas Gruenbacher 		return -ENOMEM;
2617b411b363SPhilipp Reisner 	}
2618b411b363SPhilipp Reisner 
2619e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2620b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2621a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2622b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
262380a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
262421ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
262580a40e43SLars Ellenberg 		goto submit;
262680a40e43SLars Ellenberg 
2627b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2628a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2629b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
26305f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2631b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2632b411b363SPhilipp Reisner 		break;
2633b411b363SPhilipp Reisner 
2634b411b363SPhilipp Reisner 	case P_OV_REPLY:
2635b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2636b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2637e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2638b411b363SPhilipp Reisner 		if (!di)
2639b411b363SPhilipp Reisner 			goto out_free_e;
2640b411b363SPhilipp Reisner 
2641e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2642b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2643b411b363SPhilipp Reisner 
2644db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2645db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2646c36c3cedSLars Ellenberg 
26479f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2648b411b363SPhilipp Reisner 			goto out_free_e;
2649b411b363SPhilipp Reisner 
2650e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
26519f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2652a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
26535f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2654b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2655aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2656aaaba345SLars Ellenberg 			device->use_csums = true;
2657e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
26582649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2659b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2660a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2661b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
26620f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
26630f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
26640f0601f4SLars Ellenberg 			goto submit_for_resync;
2665b411b363SPhilipp Reisner 		}
2666b411b363SPhilipp Reisner 		break;
2667b411b363SPhilipp Reisner 
2668b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2669b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
26709f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2671de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2672de228bbaSLars Ellenberg 			int i;
2673b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2674b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2675b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2676b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2677de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2678b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2679b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2680de228bbaSLars Ellenberg 			}
2681d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2682b411b363SPhilipp Reisner 					(unsigned long long)sector);
2683b411b363SPhilipp Reisner 		}
2684a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2685b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2686b411b363SPhilipp Reisner 		break;
2687b411b363SPhilipp Reisner 
2688b411b363SPhilipp Reisner 	default:
268949ba9b1bSAndreas Gruenbacher 		BUG();
2690b411b363SPhilipp Reisner 	}
2691b411b363SPhilipp Reisner 
26920f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
26930f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
26940f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
26950f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
26960f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
26970f0601f4SLars Ellenberg 	 *
26980f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
26990f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
27000f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
27010f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
27020f0601f4SLars Ellenberg 	 * a while, anyways.
27030f0601f4SLars Ellenberg 	 */
2704b411b363SPhilipp Reisner 
27050f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
27060f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
27070f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
27080f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
27090f0601f4SLars Ellenberg 	 *
27100f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
27110f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
27120f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
27130f0601f4SLars Ellenberg 	 */
2714c5a2c150SLars Ellenberg 
2715c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2716c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2717c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2718c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2719c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2720c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2721c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
2722c5a2c150SLars Ellenberg 
2723944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2724ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
2725ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
2726e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2727944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
2728b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
272980a40e43SLars Ellenberg 		goto out_free_e;
2730b411b363SPhilipp Reisner 
27310f0601f4SLars Ellenberg submit_for_resync:
2732b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
27330f0601f4SLars Ellenberg 
273480a40e43SLars Ellenberg submit:
2735944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
2736b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2737bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
2738bb3cc85eSMike Christie 				     fault_type) == 0)
273982bc0194SAndreas Gruenbacher 		return 0;
2740b411b363SPhilipp Reisner 
274110f6d992SLars Ellenberg 	/* don't care for the reason here */
2742d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
2743c5a2c150SLars Ellenberg 
2744c5a2c150SLars Ellenberg out_free_e:
27450500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2746a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
27470500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
274822cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
274922cc37a9SLars Ellenberg 
2750b30ab791SAndreas Gruenbacher 	put_ldev(device);
2751b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
275282bc0194SAndreas Gruenbacher 	return -EIO;
2753b411b363SPhilipp Reisner }
2754b411b363SPhilipp Reisner 
275569a22773SAndreas Gruenbacher /**
275669a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
275769a22773SAndreas Gruenbacher  */
275869a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
2759b411b363SPhilipp Reisner {
276069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2761b411b363SPhilipp Reisner 	int self, peer, rv = -100;
2762b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
276344ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
2764b411b363SPhilipp Reisner 
2765b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
2766b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
2767b411b363SPhilipp Reisner 
2768b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
2769b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
2770b411b363SPhilipp Reisner 
277144ed167dSPhilipp Reisner 	rcu_read_lock();
277269a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
277344ed167dSPhilipp Reisner 	rcu_read_unlock();
277444ed167dSPhilipp Reisner 	switch (after_sb_0p) {
2775b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2776b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2777b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
277844ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
2779d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2780b411b363SPhilipp Reisner 		break;
2781b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2782b411b363SPhilipp Reisner 		break;
2783b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2784b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2785b411b363SPhilipp Reisner 			rv = -1;
2786b411b363SPhilipp Reisner 			break;
2787b411b363SPhilipp Reisner 		}
2788b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2789b411b363SPhilipp Reisner 			rv =  1;
2790b411b363SPhilipp Reisner 			break;
2791b411b363SPhilipp Reisner 		}
2792b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2793b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2794b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
2795b411b363SPhilipp Reisner 			rv = 1;
2796b411b363SPhilipp Reisner 			break;
2797b411b363SPhilipp Reisner 		}
2798b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
2799b411b363SPhilipp Reisner 			rv = -1;
2800b411b363SPhilipp Reisner 			break;
2801b411b363SPhilipp Reisner 		}
2802b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
2803d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
2804b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
2805b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2806b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
280769a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2808b411b363SPhilipp Reisner 				? -1 : 1;
2809b411b363SPhilipp Reisner 			break;
2810b411b363SPhilipp Reisner 		} else {
2811b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
2812b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
2813b411b363SPhilipp Reisner 		}
281444ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
2815b411b363SPhilipp Reisner 			break;
2816b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2817b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
2818b411b363SPhilipp Reisner 			rv = -1;
2819b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
2820b411b363SPhilipp Reisner 			rv =  1;
2821b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
2822b411b363SPhilipp Reisner 		     /* Well, then use something else. */
282369a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
2824b411b363SPhilipp Reisner 				? -1 : 1;
2825b411b363SPhilipp Reisner 		break;
2826b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2827b411b363SPhilipp Reisner 		rv = -1;
2828b411b363SPhilipp Reisner 		break;
2829b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2830b411b363SPhilipp Reisner 		rv =  1;
2831b411b363SPhilipp Reisner 	}
2832b411b363SPhilipp Reisner 
2833b411b363SPhilipp Reisner 	return rv;
2834b411b363SPhilipp Reisner }
2835b411b363SPhilipp Reisner 
283669a22773SAndreas Gruenbacher /**
283769a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
283869a22773SAndreas Gruenbacher  */
283969a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
2840b411b363SPhilipp Reisner {
284169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
28426184ea21SAndreas Gruenbacher 	int hg, rv = -100;
284344ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
2844b411b363SPhilipp Reisner 
284544ed167dSPhilipp Reisner 	rcu_read_lock();
284669a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
284744ed167dSPhilipp Reisner 	rcu_read_unlock();
284844ed167dSPhilipp Reisner 	switch (after_sb_1p) {
2849b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2850b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2851b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2852b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2853b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
285444ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2855d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2856b411b363SPhilipp Reisner 		break;
2857b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2858b411b363SPhilipp Reisner 		break;
2859b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
286069a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2861b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
2862b411b363SPhilipp Reisner 			rv = hg;
2863b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
2864b411b363SPhilipp Reisner 			rv = hg;
2865b411b363SPhilipp Reisner 		break;
2866b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
286769a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
2868b411b363SPhilipp Reisner 		break;
2869b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
2870b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
2871b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
287269a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2873b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
2874bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2875bb437946SAndreas Gruenbacher 
2876b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2877b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2878b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2879b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
2880bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2881b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
2882b411b363SPhilipp Reisner 			} else {
2883d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
2884b411b363SPhilipp Reisner 				rv = hg;
2885b411b363SPhilipp Reisner 			}
2886b411b363SPhilipp Reisner 		} else
2887b411b363SPhilipp Reisner 			rv = hg;
2888b411b363SPhilipp Reisner 	}
2889b411b363SPhilipp Reisner 
2890b411b363SPhilipp Reisner 	return rv;
2891b411b363SPhilipp Reisner }
2892b411b363SPhilipp Reisner 
289369a22773SAndreas Gruenbacher /**
289469a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
289569a22773SAndreas Gruenbacher  */
289669a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
2897b411b363SPhilipp Reisner {
289869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
28996184ea21SAndreas Gruenbacher 	int hg, rv = -100;
290044ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
2901b411b363SPhilipp Reisner 
290244ed167dSPhilipp Reisner 	rcu_read_lock();
290369a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
290444ed167dSPhilipp Reisner 	rcu_read_unlock();
290544ed167dSPhilipp Reisner 	switch (after_sb_2p) {
2906b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
2907b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
2908b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
2909b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
2910b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
2911b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
2912b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
291344ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
2914d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
2915b411b363SPhilipp Reisner 		break;
2916b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
291769a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
2918b411b363SPhilipp Reisner 		break;
2919b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
2920b411b363SPhilipp Reisner 		break;
2921b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
292269a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
2923b411b363SPhilipp Reisner 		if (hg == -1) {
2924bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
2925bb437946SAndreas Gruenbacher 
2926b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2927b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
2928b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
2929b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
2930bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
2931b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
2932b411b363SPhilipp Reisner 			} else {
2933d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
2934b411b363SPhilipp Reisner 				rv = hg;
2935b411b363SPhilipp Reisner 			}
2936b411b363SPhilipp Reisner 		} else
2937b411b363SPhilipp Reisner 			rv = hg;
2938b411b363SPhilipp Reisner 	}
2939b411b363SPhilipp Reisner 
2940b411b363SPhilipp Reisner 	return rv;
2941b411b363SPhilipp Reisner }
2942b411b363SPhilipp Reisner 
2943b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
2944b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
2945b411b363SPhilipp Reisner {
2946b411b363SPhilipp Reisner 	if (!uuid) {
2947d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
2948b411b363SPhilipp Reisner 		return;
2949b411b363SPhilipp Reisner 	}
2950d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2951b411b363SPhilipp Reisner 	     text,
2952b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
2953b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
2954b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
2955b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
2956b411b363SPhilipp Reisner 	     (unsigned long long)bits,
2957b411b363SPhilipp Reisner 	     (unsigned long long)flags);
2958b411b363SPhilipp Reisner }
2959b411b363SPhilipp Reisner 
2960b411b363SPhilipp Reisner /*
2961b411b363SPhilipp Reisner   100	after split brain try auto recover
2962b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
2963b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
2964b411b363SPhilipp Reisner     0	no Sync
2965b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
2966b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
2967b411b363SPhilipp Reisner  -100	after split brain, disconnect
2968b411b363SPhilipp Reisner -1000	unrelated data
29694a23f264SPhilipp Reisner -1091   requires proto 91
29704a23f264SPhilipp Reisner -1096   requires proto 96
2971b411b363SPhilipp Reisner  */
297244a4d551SLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
2973b411b363SPhilipp Reisner {
297444a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
297544a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
2976b411b363SPhilipp Reisner 	u64 self, peer;
2977b411b363SPhilipp Reisner 	int i, j;
2978b411b363SPhilipp Reisner 
2979b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2980b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
2981b411b363SPhilipp Reisner 
2982b411b363SPhilipp Reisner 	*rule_nr = 10;
2983b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2984b411b363SPhilipp Reisner 		return 0;
2985b411b363SPhilipp Reisner 
2986b411b363SPhilipp Reisner 	*rule_nr = 20;
2987b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2988b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
2989b411b363SPhilipp Reisner 		return -2;
2990b411b363SPhilipp Reisner 
2991b411b363SPhilipp Reisner 	*rule_nr = 30;
2992b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
2993b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
2994b411b363SPhilipp Reisner 		return 2;
2995b411b363SPhilipp Reisner 
2996b411b363SPhilipp Reisner 	if (self == peer) {
2997b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
2998b411b363SPhilipp Reisner 
2999b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3000b411b363SPhilipp Reisner 
300144a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
30024a23f264SPhilipp Reisner 				return -1091;
3003b411b363SPhilipp Reisner 
3004b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3005b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3006d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3007b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3008b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3009b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3010b411b363SPhilipp Reisner 
3011b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3012b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3013b411b363SPhilipp Reisner 				*rule_nr = 34;
3014b411b363SPhilipp Reisner 			} else {
3015d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3016b411b363SPhilipp Reisner 				*rule_nr = 36;
3017b411b363SPhilipp Reisner 			}
3018b411b363SPhilipp Reisner 
3019b411b363SPhilipp Reisner 			return 1;
3020b411b363SPhilipp Reisner 		}
3021b411b363SPhilipp Reisner 
3022b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3023b411b363SPhilipp Reisner 
302444a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
30254a23f264SPhilipp Reisner 				return -1091;
3026b411b363SPhilipp Reisner 
3027b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3028b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3029d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3030b411b363SPhilipp Reisner 
3031b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3032b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3033b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3034b411b363SPhilipp Reisner 
3035b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3036b411b363SPhilipp Reisner 				*rule_nr = 35;
3037b411b363SPhilipp Reisner 			} else {
3038d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3039b411b363SPhilipp Reisner 				*rule_nr = 37;
3040b411b363SPhilipp Reisner 			}
3041b411b363SPhilipp Reisner 
3042b411b363SPhilipp Reisner 			return -1;
3043b411b363SPhilipp Reisner 		}
3044b411b363SPhilipp Reisner 
3045b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3046b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3047b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3048b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3049b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3050b411b363SPhilipp Reisner 		*rule_nr = 40;
3051b411b363SPhilipp Reisner 
3052b411b363SPhilipp Reisner 		switch (rct) {
3053b411b363SPhilipp Reisner 		case 0: /* !self_pri && !peer_pri */ return 0;
3054b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3055b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3056b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
305744a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3058b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3059b411b363SPhilipp Reisner 		}
3060b411b363SPhilipp Reisner 	}
3061b411b363SPhilipp Reisner 
3062b411b363SPhilipp Reisner 	*rule_nr = 50;
3063b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3064b411b363SPhilipp Reisner 	if (self == peer)
3065b411b363SPhilipp Reisner 		return -1;
3066b411b363SPhilipp Reisner 
3067b411b363SPhilipp Reisner 	*rule_nr = 51;
3068b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3069b411b363SPhilipp Reisner 	if (self == peer) {
307044a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3071b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3072b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3073b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3074b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3075b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3076b411b363SPhilipp Reisner 
307744a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
30784a23f264SPhilipp Reisner 				return -1091;
3079b411b363SPhilipp Reisner 
3080b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3081b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
30824a23f264SPhilipp Reisner 
3083d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3084b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
30854a23f264SPhilipp Reisner 
3086b411b363SPhilipp Reisner 			return -1;
3087b411b363SPhilipp Reisner 		}
3088b411b363SPhilipp Reisner 	}
3089b411b363SPhilipp Reisner 
3090b411b363SPhilipp Reisner 	*rule_nr = 60;
3091b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3092b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3093b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3094b411b363SPhilipp Reisner 		if (self == peer)
3095b411b363SPhilipp Reisner 			return -2;
3096b411b363SPhilipp Reisner 	}
3097b411b363SPhilipp Reisner 
3098b411b363SPhilipp Reisner 	*rule_nr = 70;
3099b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3100b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3101b411b363SPhilipp Reisner 	if (self == peer)
3102b411b363SPhilipp Reisner 		return 1;
3103b411b363SPhilipp Reisner 
3104b411b363SPhilipp Reisner 	*rule_nr = 71;
3105b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3106b411b363SPhilipp Reisner 	if (self == peer) {
310744a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3108b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3109b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3110b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3111b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3112b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3113b411b363SPhilipp Reisner 
311444a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
31154a23f264SPhilipp Reisner 				return -1091;
3116b411b363SPhilipp Reisner 
3117b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3118b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3119b411b363SPhilipp Reisner 
3120d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3121b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3122b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3123b411b363SPhilipp Reisner 
3124b411b363SPhilipp Reisner 			return 1;
3125b411b363SPhilipp Reisner 		}
3126b411b363SPhilipp Reisner 	}
3127b411b363SPhilipp Reisner 
3128b411b363SPhilipp Reisner 
3129b411b363SPhilipp Reisner 	*rule_nr = 80;
3130b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3131b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3132b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3133b411b363SPhilipp Reisner 		if (self == peer)
3134b411b363SPhilipp Reisner 			return 2;
3135b411b363SPhilipp Reisner 	}
3136b411b363SPhilipp Reisner 
3137b411b363SPhilipp Reisner 	*rule_nr = 90;
3138b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3139b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3140b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3141b411b363SPhilipp Reisner 		return 100;
3142b411b363SPhilipp Reisner 
3143b411b363SPhilipp Reisner 	*rule_nr = 100;
3144b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3145b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3146b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3147b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3148b411b363SPhilipp Reisner 			if (self == peer)
3149b411b363SPhilipp Reisner 				return -100;
3150b411b363SPhilipp Reisner 		}
3151b411b363SPhilipp Reisner 	}
3152b411b363SPhilipp Reisner 
3153b411b363SPhilipp Reisner 	return -1000;
3154b411b363SPhilipp Reisner }
3155b411b363SPhilipp Reisner 
3156b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3157b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3158b411b363SPhilipp Reisner  */
315969a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
316069a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3161b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3162b411b363SPhilipp Reisner {
316369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3164b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3165b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
316644ed167dSPhilipp Reisner 	struct net_conf *nc;
31676dff2902SAndreas Gruenbacher 	int hg, rule_nr, rr_conflict, tentative;
3168b411b363SPhilipp Reisner 
3169b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3170b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3171b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3172b411b363SPhilipp Reisner 
3173d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
31749f2247bbSPhilipp Reisner 
3175b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3176b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3177b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3178b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3179b411b363SPhilipp Reisner 
3180b30ab791SAndreas Gruenbacher 	hg = drbd_uuid_compare(device, &rule_nr);
3181b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3182b411b363SPhilipp Reisner 
3183d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3184b411b363SPhilipp Reisner 
3185b411b363SPhilipp Reisner 	if (hg == -1000) {
3186d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3187b411b363SPhilipp Reisner 		return C_MASK;
3188b411b363SPhilipp Reisner 	}
31894a23f264SPhilipp Reisner 	if (hg < -1000) {
3190d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3191b411b363SPhilipp Reisner 		return C_MASK;
3192b411b363SPhilipp Reisner 	}
3193b411b363SPhilipp Reisner 
3194b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3195b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3196b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3197b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3198b411b363SPhilipp Reisner 		if (f)
3199b411b363SPhilipp Reisner 			hg = hg*2;
3200d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3201b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3202b411b363SPhilipp Reisner 	}
3203b411b363SPhilipp Reisner 
32043a11a487SAdam Gandelman 	if (abs(hg) == 100)
3205b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
32063a11a487SAdam Gandelman 
320744ed167dSPhilipp Reisner 	rcu_read_lock();
320869a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
320944ed167dSPhilipp Reisner 
321044ed167dSPhilipp Reisner 	if (hg == 100 || (hg == -100 && nc->always_asbp)) {
3211b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3212b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3213b411b363SPhilipp Reisner 		int forced = (hg == -100);
3214b411b363SPhilipp Reisner 
3215b411b363SPhilipp Reisner 		switch (pcount) {
3216b411b363SPhilipp Reisner 		case 0:
321769a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3218b411b363SPhilipp Reisner 			break;
3219b411b363SPhilipp Reisner 		case 1:
322069a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3221b411b363SPhilipp Reisner 			break;
3222b411b363SPhilipp Reisner 		case 2:
322369a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3224b411b363SPhilipp Reisner 			break;
3225b411b363SPhilipp Reisner 		}
3226b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3227d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3228b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3229b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3230b411b363SPhilipp Reisner 			if (forced) {
3231d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3232b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3233b411b363SPhilipp Reisner 				hg = hg*2;
3234b411b363SPhilipp Reisner 			}
3235b411b363SPhilipp Reisner 		}
3236b411b363SPhilipp Reisner 	}
3237b411b363SPhilipp Reisner 
3238b411b363SPhilipp Reisner 	if (hg == -100) {
3239b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3240b411b363SPhilipp Reisner 			hg = -1;
3241b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3242b411b363SPhilipp Reisner 			hg = 1;
3243b411b363SPhilipp Reisner 
3244b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3245d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3246b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3247b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3248b411b363SPhilipp Reisner 	}
324944ed167dSPhilipp Reisner 	rr_conflict = nc->rr_conflict;
32506dff2902SAndreas Gruenbacher 	tentative = nc->tentative;
325144ed167dSPhilipp Reisner 	rcu_read_unlock();
3252b411b363SPhilipp Reisner 
3253b411b363SPhilipp Reisner 	if (hg == -100) {
3254580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3255580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3256580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3257580b9767SLars Ellenberg 		 * to that disk, in a way... */
3258d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3259b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3260b411b363SPhilipp Reisner 		return C_MASK;
3261b411b363SPhilipp Reisner 	}
3262b411b363SPhilipp Reisner 
3263b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3264d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3265b411b363SPhilipp Reisner 		return C_MASK;
3266b411b363SPhilipp Reisner 	}
3267b411b363SPhilipp Reisner 
3268b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3269b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
327044ed167dSPhilipp Reisner 		switch (rr_conflict) {
3271b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3272b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3273b411b363SPhilipp Reisner 			/* fall through */
3274b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3275d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3276b411b363SPhilipp Reisner 			return C_MASK;
3277b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3278d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3279b411b363SPhilipp Reisner 			     "assumption\n");
3280b411b363SPhilipp Reisner 		}
3281b411b363SPhilipp Reisner 	}
3282b411b363SPhilipp Reisner 
328369a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3284cf14c2e9SPhilipp Reisner 		if (hg == 0)
3285d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3286cf14c2e9SPhilipp Reisner 		else
3287d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3288cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3289cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3290cf14c2e9SPhilipp Reisner 		return C_MASK;
3291cf14c2e9SPhilipp Reisner 	}
3292cf14c2e9SPhilipp Reisner 
3293b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3294d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3295b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
329620ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3297b411b363SPhilipp Reisner 			return C_MASK;
3298b411b363SPhilipp Reisner 	}
3299b411b363SPhilipp Reisner 
3300b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3301b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3302b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3303b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3304b411b363SPhilipp Reisner 	} else {
3305b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3306b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3307d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3308b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3309b411b363SPhilipp Reisner 		}
3310b411b363SPhilipp Reisner 	}
3311b411b363SPhilipp Reisner 
3312b411b363SPhilipp Reisner 	return rv;
3313b411b363SPhilipp Reisner }
3314b411b363SPhilipp Reisner 
3315f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3316b411b363SPhilipp Reisner {
3317b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3318f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3319f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3320b411b363SPhilipp Reisner 
3321b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3322f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3323f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3324b411b363SPhilipp Reisner 
3325b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3326f179d76dSPhilipp Reisner 	return peer;
3327b411b363SPhilipp Reisner }
3328b411b363SPhilipp Reisner 
3329bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3330b411b363SPhilipp Reisner {
3331e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3332036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3333036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3334036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3335036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
33369534d671SHerbert Xu 	struct crypto_ahash *peer_integrity_tfm = NULL;
33377aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3338b411b363SPhilipp Reisner 
3339b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3340b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3341b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3342b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3343b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3344cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
33456139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3346cf14c2e9SPhilipp Reisner 
3347bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
334886db0618SAndreas Gruenbacher 		int err;
334986db0618SAndreas Gruenbacher 
335088104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
335186db0618SAndreas Gruenbacher 			return -EIO;
3352bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
335386db0618SAndreas Gruenbacher 		if (err)
335486db0618SAndreas Gruenbacher 			return err;
335588104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3356036b17eaSPhilipp Reisner 	}
335786db0618SAndreas Gruenbacher 
33587d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3359bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3360cf14c2e9SPhilipp Reisner 
3361cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3362bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3363b411b363SPhilipp Reisner 
336444ed167dSPhilipp Reisner 		rcu_read_lock();
3365bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
336644ed167dSPhilipp Reisner 
3367036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
33681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
336944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3370b411b363SPhilipp Reisner 		}
3371b411b363SPhilipp Reisner 
3372f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
33731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
337444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3375b411b363SPhilipp Reisner 		}
3376b411b363SPhilipp Reisner 
3377f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
33781ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
337944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3380b411b363SPhilipp Reisner 		}
3381b411b363SPhilipp Reisner 
3382f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
33831ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
338444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3385b411b363SPhilipp Reisner 		}
3386b411b363SPhilipp Reisner 
33876139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
33881ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
338944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3390b411b363SPhilipp Reisner 		}
3391b411b363SPhilipp Reisner 
339244ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
33931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
339444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3395b411b363SPhilipp Reisner 		}
3396b411b363SPhilipp Reisner 
3397036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
33981ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3399036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3400036b17eaSPhilipp Reisner 		}
3401036b17eaSPhilipp Reisner 
340286db0618SAndreas Gruenbacher 		rcu_read_unlock();
3403fbc12f45SAndreas Gruenbacher 	}
34047d4c782cSAndreas Gruenbacher 
34057d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
34067d4c782cSAndreas Gruenbacher 		int hash_size;
34077d4c782cSAndreas Gruenbacher 
34087d4c782cSAndreas Gruenbacher 		/*
34097d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
34107d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
34117d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
34127d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
34137d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
34147d4c782cSAndreas Gruenbacher 		 * change.
34157d4c782cSAndreas Gruenbacher 		 */
34167d4c782cSAndreas Gruenbacher 
34179534d671SHerbert Xu 		peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
34187d4c782cSAndreas Gruenbacher 		if (!peer_integrity_tfm) {
34191ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
34207d4c782cSAndreas Gruenbacher 				 integrity_alg);
3421b411b363SPhilipp Reisner 			goto disconnect;
3422b411b363SPhilipp Reisner 		}
3423b411b363SPhilipp Reisner 
34249534d671SHerbert Xu 		hash_size = crypto_ahash_digestsize(peer_integrity_tfm);
34257d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
34267d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
34277d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
34281ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
34297d4c782cSAndreas Gruenbacher 			goto disconnect;
34307d4c782cSAndreas Gruenbacher 		}
34317d4c782cSAndreas Gruenbacher 	}
34327d4c782cSAndreas Gruenbacher 
34337d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
34347d4c782cSAndreas Gruenbacher 	if (!new_net_conf) {
34351ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Allocation of new net_conf failed\n");
3436b411b363SPhilipp Reisner 		goto disconnect;
3437b411b363SPhilipp Reisner 	}
3438b411b363SPhilipp Reisner 
3439bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
34400500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3441bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
34427d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3443b411b363SPhilipp Reisner 
34447d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
34457d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
34467d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
34477d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
34487d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3449b411b363SPhilipp Reisner 
3450bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
34510500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3452bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3453b411b363SPhilipp Reisner 
34549534d671SHerbert Xu 	crypto_free_ahash(connection->peer_integrity_tfm);
3455bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3456bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3457bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3458bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3459bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3460b411b363SPhilipp Reisner 
34617d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
34621ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
34637d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3464b411b363SPhilipp Reisner 
34657d4c782cSAndreas Gruenbacher 	synchronize_rcu();
34667d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
346782bc0194SAndreas Gruenbacher 	return 0;
3468b411b363SPhilipp Reisner 
346944ed167dSPhilipp Reisner disconnect_rcu_unlock:
347044ed167dSPhilipp Reisner 	rcu_read_unlock();
3471b411b363SPhilipp Reisner disconnect:
34729534d671SHerbert Xu 	crypto_free_ahash(peer_integrity_tfm);
3473036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3474036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3475bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
347682bc0194SAndreas Gruenbacher 	return -EIO;
3477b411b363SPhilipp Reisner }
3478b411b363SPhilipp Reisner 
3479b411b363SPhilipp Reisner /* helper function
3480b411b363SPhilipp Reisner  * input: alg name, feature name
3481b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3482b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3483b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
34849534d671SHerbert Xu static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
3485b411b363SPhilipp Reisner 		const char *alg, const char *name)
3486b411b363SPhilipp Reisner {
34879534d671SHerbert Xu 	struct crypto_ahash *tfm;
3488b411b363SPhilipp Reisner 
3489b411b363SPhilipp Reisner 	if (!alg[0])
3490b411b363SPhilipp Reisner 		return NULL;
3491b411b363SPhilipp Reisner 
34929534d671SHerbert Xu 	tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
3493b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3494d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3495b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3496b411b363SPhilipp Reisner 		return tfm;
3497b411b363SPhilipp Reisner 	}
3498b411b363SPhilipp Reisner 	return tfm;
3499b411b363SPhilipp Reisner }
3500b411b363SPhilipp Reisner 
3501bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3502b411b363SPhilipp Reisner {
3503bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
35044a76b161SAndreas Gruenbacher 	int size = pi->size;
35054a76b161SAndreas Gruenbacher 
35064a76b161SAndreas Gruenbacher 	while (size) {
35074a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3508bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
35094a76b161SAndreas Gruenbacher 		if (s <= 0) {
35104a76b161SAndreas Gruenbacher 			if (s < 0)
35114a76b161SAndreas Gruenbacher 				return s;
35124a76b161SAndreas Gruenbacher 			break;
35134a76b161SAndreas Gruenbacher 		}
35144a76b161SAndreas Gruenbacher 		size -= s;
35154a76b161SAndreas Gruenbacher 	}
35164a76b161SAndreas Gruenbacher 	if (size)
35174a76b161SAndreas Gruenbacher 		return -EIO;
35184a76b161SAndreas Gruenbacher 	return 0;
35194a76b161SAndreas Gruenbacher }
35204a76b161SAndreas Gruenbacher 
35214a76b161SAndreas Gruenbacher /*
35224a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
35234a76b161SAndreas Gruenbacher  *
35244a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
35254a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
35264a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
35274a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
35284a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
35294a76b161SAndreas Gruenbacher  *
35304a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
35314a76b161SAndreas Gruenbacher  */
3532bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
35334a76b161SAndreas Gruenbacher {
35341ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
35352fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3536bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
35374a76b161SAndreas Gruenbacher }
35384a76b161SAndreas Gruenbacher 
3539bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
35404a76b161SAndreas Gruenbacher {
35419f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3542b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3543e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3544b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
35459534d671SHerbert Xu 	struct crypto_ahash *verify_tfm = NULL;
35469534d671SHerbert Xu 	struct crypto_ahash *csums_tfm = NULL;
35472ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3548813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3549bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3550813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
3551778f271dSPhilipp Reisner 	int fifo_size = 0;
355282bc0194SAndreas Gruenbacher 	int err;
3553b411b363SPhilipp Reisner 
35549f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
35559f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3556bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
35579f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3558b411b363SPhilipp Reisner 
3559b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3560b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3561b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
35628e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
35638e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3564b411b363SPhilipp Reisner 
3565e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3566d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3567e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
356882bc0194SAndreas Gruenbacher 		return -EIO;
3569b411b363SPhilipp Reisner 	}
3570b411b363SPhilipp Reisner 
3571b411b363SPhilipp Reisner 	if (apv <= 88) {
3572e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3573e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35748e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3575e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3576e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35770b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
35788e26f9ccSPhilipp Reisner 	} else {
3579e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3580e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
35810b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3582b411b363SPhilipp Reisner 	}
3583b411b363SPhilipp Reisner 
3584b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3585e658983aSAndreas Gruenbacher 	p = pi->data;
3586b411b363SPhilipp Reisner 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3587b411b363SPhilipp Reisner 
35889f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
358982bc0194SAndreas Gruenbacher 	if (err)
359082bc0194SAndreas Gruenbacher 		return err;
3591b411b363SPhilipp Reisner 
35920500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
35939f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3594b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3595daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3596daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3597b30ab791SAndreas Gruenbacher 			put_ldev(device);
35980500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3599d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3600daeda1ccSPhilipp Reisner 			return -ENOMEM;
3601f399002eSLars Ellenberg 		}
3602b411b363SPhilipp Reisner 
3603b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3604daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3605daeda1ccSPhilipp Reisner 
36066394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3607813472ceSPhilipp Reisner 	}
3608b411b363SPhilipp Reisner 
3609b411b363SPhilipp Reisner 	if (apv >= 88) {
3610b411b363SPhilipp Reisner 		if (apv == 88) {
36115de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3612d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
36135de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3614b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3615813472ceSPhilipp Reisner 				err = -EIO;
3616813472ceSPhilipp Reisner 				goto reconnect;
3617b411b363SPhilipp Reisner 			}
3618b411b363SPhilipp Reisner 
36199f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3620813472ceSPhilipp Reisner 			if (err)
3621813472ceSPhilipp Reisner 				goto reconnect;
3622b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3623b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
36240b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3625b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3626b411b363SPhilipp Reisner 
3627b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3628b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3629b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
36300b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
36310b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3632b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3633b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3634b411b363SPhilipp Reisner 		}
3635b411b363SPhilipp Reisner 
36362ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3637b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3638d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
36392ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3640b411b363SPhilipp Reisner 				goto disconnect;
3641b411b363SPhilipp Reisner 			}
3642b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3643b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3644b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3645b411b363SPhilipp Reisner 				verify_tfm = NULL;
3646b411b363SPhilipp Reisner 				goto disconnect;
3647b411b363SPhilipp Reisner 			}
3648b411b363SPhilipp Reisner 		}
3649b411b363SPhilipp Reisner 
36502ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3651b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3652d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
36532ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3654b411b363SPhilipp Reisner 				goto disconnect;
3655b411b363SPhilipp Reisner 			}
3656b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3657b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3658b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3659b411b363SPhilipp Reisner 				csums_tfm = NULL;
3660b411b363SPhilipp Reisner 				goto disconnect;
3661b411b363SPhilipp Reisner 			}
3662b411b363SPhilipp Reisner 		}
3663b411b363SPhilipp Reisner 
3664813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3665daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3666daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3667daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3668daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3669778f271dSPhilipp Reisner 
3670daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3671b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3672813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3673813472ceSPhilipp Reisner 				if (!new_plan) {
3674d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3675b30ab791SAndreas Gruenbacher 					put_ldev(device);
3676778f271dSPhilipp Reisner 					goto disconnect;
3677778f271dSPhilipp Reisner 				}
3678778f271dSPhilipp Reisner 			}
36798e26f9ccSPhilipp Reisner 		}
3680b411b363SPhilipp Reisner 
368191fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
36822ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
36832ec91e0eSPhilipp Reisner 			if (!new_net_conf) {
3684d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new net_conf failed\n");
368591fd4dadSPhilipp Reisner 				goto disconnect;
368691fd4dadSPhilipp Reisner 			}
368791fd4dadSPhilipp Reisner 
36882ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
368991fd4dadSPhilipp Reisner 
3690b411b363SPhilipp Reisner 			if (verify_tfm) {
36912ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
36922ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
36939534d671SHerbert Xu 				crypto_free_ahash(peer_device->connection->verify_tfm);
36949f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
3695d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3696b411b363SPhilipp Reisner 			}
3697b411b363SPhilipp Reisner 			if (csums_tfm) {
36982ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
36992ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
37009534d671SHerbert Xu 				crypto_free_ahash(peer_device->connection->csums_tfm);
37019f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
3702d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3703b411b363SPhilipp Reisner 			}
3704bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
3705778f271dSPhilipp Reisner 		}
3706b411b363SPhilipp Reisner 	}
3707b411b363SPhilipp Reisner 
3708813472ceSPhilipp Reisner 	if (new_disk_conf) {
3709b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3710b30ab791SAndreas Gruenbacher 		put_ldev(device);
3711b411b363SPhilipp Reisner 	}
3712813472ceSPhilipp Reisner 
3713813472ceSPhilipp Reisner 	if (new_plan) {
3714b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
3715b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
3716813472ceSPhilipp Reisner 	}
3717daeda1ccSPhilipp Reisner 
37180500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3719daeda1ccSPhilipp Reisner 	synchronize_rcu();
3720daeda1ccSPhilipp Reisner 	if (new_net_conf)
3721daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
3722daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
3723813472ceSPhilipp Reisner 	kfree(old_plan);
3724daeda1ccSPhilipp Reisner 
372582bc0194SAndreas Gruenbacher 	return 0;
3726b411b363SPhilipp Reisner 
3727813472ceSPhilipp Reisner reconnect:
3728813472ceSPhilipp Reisner 	if (new_disk_conf) {
3729b30ab791SAndreas Gruenbacher 		put_ldev(device);
3730813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3731813472ceSPhilipp Reisner 	}
37320500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3733813472ceSPhilipp Reisner 	return -EIO;
3734813472ceSPhilipp Reisner 
3735b411b363SPhilipp Reisner disconnect:
3736813472ceSPhilipp Reisner 	kfree(new_plan);
3737813472ceSPhilipp Reisner 	if (new_disk_conf) {
3738b30ab791SAndreas Gruenbacher 		put_ldev(device);
3739813472ceSPhilipp Reisner 		kfree(new_disk_conf);
3740813472ceSPhilipp Reisner 	}
37410500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3742b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
3743b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
37449534d671SHerbert Xu 	crypto_free_ahash(csums_tfm);
3745b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
37469534d671SHerbert Xu 	crypto_free_ahash(verify_tfm);
37479f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
374882bc0194SAndreas Gruenbacher 	return -EIO;
3749b411b363SPhilipp Reisner }
3750b411b363SPhilipp Reisner 
3751b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
3752b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
3753b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
3754b411b363SPhilipp Reisner {
3755b411b363SPhilipp Reisner 	sector_t d;
3756b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
3757b411b363SPhilipp Reisner 		return;
3758b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
3759b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
3760d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
3761b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
3762b411b363SPhilipp Reisner }
3763b411b363SPhilipp Reisner 
3764bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
3765b411b363SPhilipp Reisner {
37669f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3767b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3768e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
3769e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
37706a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
3771b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
3772e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
3773b411b363SPhilipp Reisner 
37749f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
37759f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3776bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
37779f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
37784a76b161SAndreas Gruenbacher 
3779b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
3780b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
37816a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
3782b411b363SPhilipp Reisner 
3783b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
3784b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
3785b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
3786b411b363SPhilipp Reisner 
3787b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3788daeda1ccSPhilipp Reisner 		rcu_read_lock();
3789b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
3790daeda1ccSPhilipp Reisner 		rcu_read_unlock();
3791daeda1ccSPhilipp Reisner 
3792b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
3793b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
3794b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
3795daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
3796b411b363SPhilipp Reisner 
3797b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
3798b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
3799b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
3800daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
3801b411b363SPhilipp Reisner 
3802b411b363SPhilipp Reisner 		/* Never shrink a device with usable data during connect.
3803b411b363SPhilipp Reisner 		   But allow online shrinking if we are connected. */
3804b30ab791SAndreas Gruenbacher 		if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3805b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) &&
3806b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
3807b30ab791SAndreas Gruenbacher 		    device->state.conn < C_CONNECTED) {
3808d0180171SAndreas Gruenbacher 			drbd_err(device, "The peer's disk size is too small!\n");
38099f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
3810b30ab791SAndreas Gruenbacher 			put_ldev(device);
381182bc0194SAndreas Gruenbacher 			return -EIO;
3812b411b363SPhilipp Reisner 		}
3813daeda1ccSPhilipp Reisner 
3814daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
3815daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3816daeda1ccSPhilipp Reisner 
3817daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3818daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
3819d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new disk_conf failed\n");
3820b30ab791SAndreas Gruenbacher 				put_ldev(device);
3821daeda1ccSPhilipp Reisner 				return -ENOMEM;
3822daeda1ccSPhilipp Reisner 			}
3823daeda1ccSPhilipp Reisner 
38240500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
3825b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
3826daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
3827daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
3828daeda1ccSPhilipp Reisner 
3829b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
38300500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3831daeda1ccSPhilipp Reisner 			synchronize_rcu();
3832daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
3833daeda1ccSPhilipp Reisner 
3834d0180171SAndreas Gruenbacher 			drbd_info(device, "Peer sets u_size to %lu sectors\n",
3835daeda1ccSPhilipp Reisner 				 (unsigned long)my_usize);
3836daeda1ccSPhilipp Reisner 		}
3837daeda1ccSPhilipp Reisner 
3838b30ab791SAndreas Gruenbacher 		put_ldev(device);
3839b411b363SPhilipp Reisner 	}
3840b411b363SPhilipp Reisner 
384120c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
384220c68fdeSLars Ellenberg 	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
384320c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
384420c68fdeSLars Ellenberg 	   drbd_reconsider_max_bio_size(), we can be sure that after
384520c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
384620c68fdeSLars Ellenberg 
3847e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
3848b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
38498fe39aacSPhilipp Reisner 		drbd_reconsider_max_bio_size(device, device->ldev);
3850b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
3851b30ab791SAndreas Gruenbacher 		put_ldev(device);
3852e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
385382bc0194SAndreas Gruenbacher 			return -EIO;
3854b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
3855b411b363SPhilipp Reisner 	} else {
38566a8d68b1SLars Ellenberg 		/*
38576a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
38586a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
38596a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
38606a8d68b1SLars Ellenberg 		 *
38616a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
38626a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
38636a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
38646a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
38656a8d68b1SLars Ellenberg 		 *
38666a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
38676a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
38686a8d68b1SLars Ellenberg 		 */
38698fe39aacSPhilipp Reisner 		drbd_reconsider_max_bio_size(device, NULL);
38706a8d68b1SLars Ellenberg 		drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
3871b411b363SPhilipp Reisner 	}
3872b411b363SPhilipp Reisner 
3873b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3874b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3875b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
3876b411b363SPhilipp Reisner 			ldsc = 1;
3877b411b363SPhilipp Reisner 		}
3878b411b363SPhilipp Reisner 
3879b30ab791SAndreas Gruenbacher 		put_ldev(device);
3880b411b363SPhilipp Reisner 	}
3881b411b363SPhilipp Reisner 
3882b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
3883b411b363SPhilipp Reisner 		if (be64_to_cpu(p->c_size) !=
3884b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) || ldsc) {
3885b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
3886b411b363SPhilipp Reisner 			 * needs to know my new size... */
388769a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
3888b411b363SPhilipp Reisner 		}
3889b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3890b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3891b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
3892b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
3893e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
3894d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
3895b411b363SPhilipp Reisner 				else
3896b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
3897e89b591cSPhilipp Reisner 			} else
3898b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
3899b411b363SPhilipp Reisner 		}
3900b411b363SPhilipp Reisner 	}
3901b411b363SPhilipp Reisner 
390282bc0194SAndreas Gruenbacher 	return 0;
3903b411b363SPhilipp Reisner }
3904b411b363SPhilipp Reisner 
3905bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
3906b411b363SPhilipp Reisner {
39079f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3908b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3909e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
3910b411b363SPhilipp Reisner 	u64 *p_uuid;
391162b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
3912b411b363SPhilipp Reisner 
39139f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
39149f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3915bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
39169f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
39174a76b161SAndreas Gruenbacher 
3918b411b363SPhilipp Reisner 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3919063eacf8SJing Wang 	if (!p_uuid) {
3920d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc of p_uuid failed\n");
3921063eacf8SJing Wang 		return false;
3922063eacf8SJing Wang 	}
3923b411b363SPhilipp Reisner 
3924b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3925b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
3926b411b363SPhilipp Reisner 
3927b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
3928b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
3929b411b363SPhilipp Reisner 
3930b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED &&
3931b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
3932b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
3933b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3934d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
3935b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
39369f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
393782bc0194SAndreas Gruenbacher 		return -EIO;
3938b411b363SPhilipp Reisner 	}
3939b411b363SPhilipp Reisner 
3940b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3941b411b363SPhilipp Reisner 		int skip_initial_sync =
3942b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
39439f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
3944b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3945b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
3946b411b363SPhilipp Reisner 		if (skip_initial_sync) {
3947d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
3948b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
394920ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
395020ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
3951b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3952b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
3953b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3954b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
3955b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
395662b0da3aSLars Ellenberg 			updated_uuids = 1;
3957b411b363SPhilipp Reisner 		}
3958b30ab791SAndreas Gruenbacher 		put_ldev(device);
3959b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
3960b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
396118a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
396218a50fa2SPhilipp Reisner 		   for me. */
3963b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
3964b411b363SPhilipp Reisner 	}
3965b411b363SPhilipp Reisner 
3966b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
3967b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
3968b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
3969b411b363SPhilipp Reisner 	   new disk state... */
3970b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
3971b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
3972b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3973b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
397462b0da3aSLars Ellenberg 
397562b0da3aSLars Ellenberg 	if (updated_uuids)
3976b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
3977b411b363SPhilipp Reisner 
397882bc0194SAndreas Gruenbacher 	return 0;
3979b411b363SPhilipp Reisner }
3980b411b363SPhilipp Reisner 
3981b411b363SPhilipp Reisner /**
3982b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
3983b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
3984b411b363SPhilipp Reisner  */
3985b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
3986b411b363SPhilipp Reisner {
3987b411b363SPhilipp Reisner 	union drbd_state ms;
3988b411b363SPhilipp Reisner 
3989b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
3990369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
3991b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
3992b411b363SPhilipp Reisner 
3993b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3994b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3995b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3996b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
3997b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
3998b411b363SPhilipp Reisner 	};
3999b411b363SPhilipp Reisner 
4000b411b363SPhilipp Reisner 	ms.i = ps.i;
4001b411b363SPhilipp Reisner 
4002b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4003b411b363SPhilipp Reisner 	ms.peer = ps.role;
4004b411b363SPhilipp Reisner 	ms.role = ps.peer;
4005b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4006b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4007b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4008b411b363SPhilipp Reisner 
4009b411b363SPhilipp Reisner 	return ms;
4010b411b363SPhilipp Reisner }
4011b411b363SPhilipp Reisner 
4012bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4013b411b363SPhilipp Reisner {
40149f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4015b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4016e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4017b411b363SPhilipp Reisner 	union drbd_state mask, val;
4018bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4019b411b363SPhilipp Reisner 
40209f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40219f4fe9adSAndreas Gruenbacher 	if (!peer_device)
40224a76b161SAndreas Gruenbacher 		return -EIO;
40239f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
40244a76b161SAndreas Gruenbacher 
4025b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4026b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4027b411b363SPhilipp Reisner 
40289f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4029b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
403069a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
403182bc0194SAndreas Gruenbacher 		return 0;
4032b411b363SPhilipp Reisner 	}
4033b411b363SPhilipp Reisner 
4034b411b363SPhilipp Reisner 	mask = convert_state(mask);
4035b411b363SPhilipp Reisner 	val = convert_state(val);
4036b411b363SPhilipp Reisner 
4037b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
403869a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4039047cd4a6SPhilipp Reisner 
4040b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4041b411b363SPhilipp Reisner 
404282bc0194SAndreas Gruenbacher 	return 0;
4043b411b363SPhilipp Reisner }
4044b411b363SPhilipp Reisner 
4045bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4046b411b363SPhilipp Reisner {
4047e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4048dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4049dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4050dfafcc8aSPhilipp Reisner 
4051dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4052dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4053dfafcc8aSPhilipp Reisner 
4054bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4055bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4056bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
405782bc0194SAndreas Gruenbacher 		return 0;
4058dfafcc8aSPhilipp Reisner 	}
4059dfafcc8aSPhilipp Reisner 
4060dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4061dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4062dfafcc8aSPhilipp Reisner 
4063bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4064bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4065dfafcc8aSPhilipp Reisner 
406682bc0194SAndreas Gruenbacher 	return 0;
4067dfafcc8aSPhilipp Reisner }
4068dfafcc8aSPhilipp Reisner 
4069bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4070b411b363SPhilipp Reisner {
40719f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4072b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4073e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
40744ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4075b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
407665d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4077b411b363SPhilipp Reisner 	int rv;
4078b411b363SPhilipp Reisner 
40799f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40809f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4081bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
40829f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
40834a76b161SAndreas Gruenbacher 
4084b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4085b411b363SPhilipp Reisner 
4086b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4087b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4088b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4089d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4090b411b363SPhilipp Reisner 	}
4091b411b363SPhilipp Reisner 
40920500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4093b411b363SPhilipp Reisner  retry:
4094b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
40950500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4096b411b363SPhilipp Reisner 
4097668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4098545752d5SLars Ellenberg 	 * already decided to close the connection again,
4099545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4100545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
410158ffa580SLars Ellenberg 		return -ECONNRESET;
4102545752d5SLars Ellenberg 
410340424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
410440424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
410540424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
410640424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
410740424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
410840424e4aSLars Ellenberg 	 */
410940424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
411040424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4111e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4112e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4113e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4114e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4115e9ef7bb6SLars Ellenberg 		 * syncing states.
4116e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4117e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4118e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4119e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4120e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4121e9ef7bb6SLars Ellenberg 
4122e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4123e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4124e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4125e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4126e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4127b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4128b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
412982bc0194SAndreas Gruenbacher 			return 0;
4130e9ef7bb6SLars Ellenberg 		}
4131e9ef7bb6SLars Ellenberg 	}
4132e9ef7bb6SLars Ellenberg 
413302b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
413402b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
413502b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4136b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4137b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
413858ffa580SLars Ellenberg 		return 0;
413902b91b55SLars Ellenberg 	}
414002b91b55SLars Ellenberg 
4141e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4142e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4143e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4144e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4145e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4146e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4147e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4148e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4149e9ef7bb6SLars Ellenberg 
41504ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
41514ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4152b411b363SPhilipp Reisner 
415367531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
415467531718SPhilipp Reisner 		ns.conn = C_BEHIND;
415567531718SPhilipp Reisner 
4156b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4157b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4158b411b363SPhilipp Reisner 		int cr; /* consider resync */
4159b411b363SPhilipp Reisner 
4160b411b363SPhilipp Reisner 		/* if we established a new connection */
41614ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4162b411b363SPhilipp Reisner 		/* if we had an established connection
4163b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
41644ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4165b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
41664ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4167b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4168b411b363SPhilipp Reisner 		 * forced to be UpToDate with --overwrite-data */
4169b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4170b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4171b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
41724ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4173b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4174b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4175b411b363SPhilipp Reisner 
4176b411b363SPhilipp Reisner 		if (cr)
417769a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4178b411b363SPhilipp Reisner 
4179b30ab791SAndreas Gruenbacher 		put_ldev(device);
41804ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
41814ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4182b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4183b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4184b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4185d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4186b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4187580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4188b411b363SPhilipp Reisner 			} else {
41899f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
419082bc0194SAndreas Gruenbacher 					return -EIO;
41910b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
41929f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
419382bc0194SAndreas Gruenbacher 				return -EIO;
4194b411b363SPhilipp Reisner 			}
4195b411b363SPhilipp Reisner 		}
4196b411b363SPhilipp Reisner 	}
4197b411b363SPhilipp Reisner 
41980500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4199b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4200b411b363SPhilipp Reisner 		goto retry;
4201b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4202b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4203b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4204b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
42054ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4206b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
42074ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4208b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4209b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
42108554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4211481c6f50SPhilipp Reisner 		   for temporal network outages! */
42120500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4213d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
42149f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4215b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4216b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
42179f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
421882bc0194SAndreas Gruenbacher 		return -EIO;
4219481c6f50SPhilipp Reisner 	}
4220b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4221b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
42220500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4223b411b363SPhilipp Reisner 
4224b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
42259f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
422682bc0194SAndreas Gruenbacher 		return -EIO;
4227b411b363SPhilipp Reisner 	}
4228b411b363SPhilipp Reisner 
42294ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
42304ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4231b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4232b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4233b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4234b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
423569a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
423669a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4237b411b363SPhilipp Reisner 		}
4238b411b363SPhilipp Reisner 	}
4239b411b363SPhilipp Reisner 
4240b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4241b411b363SPhilipp Reisner 
4242b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4243b411b363SPhilipp Reisner 
424482bc0194SAndreas Gruenbacher 	return 0;
4245b411b363SPhilipp Reisner }
4246b411b363SPhilipp Reisner 
4247bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4248b411b363SPhilipp Reisner {
42499f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4250b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4251e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
42524a76b161SAndreas Gruenbacher 
42539f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42549f4fe9adSAndreas Gruenbacher 	if (!peer_device)
42554a76b161SAndreas Gruenbacher 		return -EIO;
42569f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4257b411b363SPhilipp Reisner 
4258b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4259b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4260b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4261b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4262b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4263b411b363SPhilipp Reisner 
42640b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4265b411b363SPhilipp Reisner 
4266b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4267b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4268b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4269b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4270b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4271b411b363SPhilipp Reisner 
4272b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4273b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4274b411b363SPhilipp Reisner 
4275b30ab791SAndreas Gruenbacher 		put_ldev(device);
4276b411b363SPhilipp Reisner 	} else
4277d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4278b411b363SPhilipp Reisner 
427982bc0194SAndreas Gruenbacher 	return 0;
4280b411b363SPhilipp Reisner }
4281b411b363SPhilipp Reisner 
42822c46407dSAndreas Gruenbacher /**
42832c46407dSAndreas Gruenbacher  * receive_bitmap_plain
42842c46407dSAndreas Gruenbacher  *
42852c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
42862c46407dSAndreas Gruenbacher  * code upon failure.
42872c46407dSAndreas Gruenbacher  */
42882c46407dSAndreas Gruenbacher static int
428969a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4290e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4291b411b363SPhilipp Reisner {
429250d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
429369a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4294e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
429550d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4296e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
42972c46407dSAndreas Gruenbacher 	int err;
4298b411b363SPhilipp Reisner 
429950d0b1adSAndreas Gruenbacher 	if (want != size) {
430069a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
43012c46407dSAndreas Gruenbacher 		return -EIO;
4302b411b363SPhilipp Reisner 	}
4303b411b363SPhilipp Reisner 	if (want == 0)
43042c46407dSAndreas Gruenbacher 		return 0;
430569a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
430682bc0194SAndreas Gruenbacher 	if (err)
43072c46407dSAndreas Gruenbacher 		return err;
4308b411b363SPhilipp Reisner 
430969a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4310b411b363SPhilipp Reisner 
4311b411b363SPhilipp Reisner 	c->word_offset += num_words;
4312b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4313b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4314b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4315b411b363SPhilipp Reisner 
43162c46407dSAndreas Gruenbacher 	return 1;
4317b411b363SPhilipp Reisner }
4318b411b363SPhilipp Reisner 
4319a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4320a02d1240SAndreas Gruenbacher {
4321a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4322a02d1240SAndreas Gruenbacher }
4323a02d1240SAndreas Gruenbacher 
4324a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4325a02d1240SAndreas Gruenbacher {
4326a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4327a02d1240SAndreas Gruenbacher }
4328a02d1240SAndreas Gruenbacher 
4329a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4330a02d1240SAndreas Gruenbacher {
4331a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4332a02d1240SAndreas Gruenbacher }
4333a02d1240SAndreas Gruenbacher 
43342c46407dSAndreas Gruenbacher /**
43352c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
43362c46407dSAndreas Gruenbacher  *
43372c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
43382c46407dSAndreas Gruenbacher  * code upon failure.
43392c46407dSAndreas Gruenbacher  */
43402c46407dSAndreas Gruenbacher static int
434169a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4342b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4343c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4344c6d25cfeSPhilipp Reisner 		 unsigned int len)
4345b411b363SPhilipp Reisner {
4346b411b363SPhilipp Reisner 	struct bitstream bs;
4347b411b363SPhilipp Reisner 	u64 look_ahead;
4348b411b363SPhilipp Reisner 	u64 rl;
4349b411b363SPhilipp Reisner 	u64 tmp;
4350b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4351b411b363SPhilipp Reisner 	unsigned long e;
4352a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4353b411b363SPhilipp Reisner 	int have;
4354b411b363SPhilipp Reisner 	int bits;
4355b411b363SPhilipp Reisner 
4356a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4357b411b363SPhilipp Reisner 
4358b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4359b411b363SPhilipp Reisner 	if (bits < 0)
43602c46407dSAndreas Gruenbacher 		return -EIO;
4361b411b363SPhilipp Reisner 
4362b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4363b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4364b411b363SPhilipp Reisner 		if (bits <= 0)
43652c46407dSAndreas Gruenbacher 			return -EIO;
4366b411b363SPhilipp Reisner 
4367b411b363SPhilipp Reisner 		if (toggle) {
4368b411b363SPhilipp Reisner 			e = s + rl -1;
4369b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
437069a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
43712c46407dSAndreas Gruenbacher 				return -EIO;
4372b411b363SPhilipp Reisner 			}
437369a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4374b411b363SPhilipp Reisner 		}
4375b411b363SPhilipp Reisner 
4376b411b363SPhilipp Reisner 		if (have < bits) {
437769a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4378b411b363SPhilipp Reisner 				have, bits, look_ahead,
4379b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4380b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
43812c46407dSAndreas Gruenbacher 			return -EIO;
4382b411b363SPhilipp Reisner 		}
4383d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4384d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4385b411b363SPhilipp Reisner 			look_ahead >>= bits;
4386d2da5b0cSLars Ellenberg 		else
4387d2da5b0cSLars Ellenberg 			look_ahead = 0;
4388b411b363SPhilipp Reisner 		have -= bits;
4389b411b363SPhilipp Reisner 
4390b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4391b411b363SPhilipp Reisner 		if (bits < 0)
43922c46407dSAndreas Gruenbacher 			return -EIO;
4393b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4394b411b363SPhilipp Reisner 		have += bits;
4395b411b363SPhilipp Reisner 	}
4396b411b363SPhilipp Reisner 
4397b411b363SPhilipp Reisner 	c->bit_offset = s;
4398b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4399b411b363SPhilipp Reisner 
44002c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4401b411b363SPhilipp Reisner }
4402b411b363SPhilipp Reisner 
44032c46407dSAndreas Gruenbacher /**
44042c46407dSAndreas Gruenbacher  * decode_bitmap_c
44052c46407dSAndreas Gruenbacher  *
44062c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
44072c46407dSAndreas Gruenbacher  * code upon failure.
44082c46407dSAndreas Gruenbacher  */
44092c46407dSAndreas Gruenbacher static int
441069a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4411b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4412c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4413c6d25cfeSPhilipp Reisner 		unsigned int len)
4414b411b363SPhilipp Reisner {
4415a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
441669a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4417b411b363SPhilipp Reisner 
4418b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4419b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4420b411b363SPhilipp Reisner 	 * during all our tests. */
4421b411b363SPhilipp Reisner 
442269a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
442369a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
44242c46407dSAndreas Gruenbacher 	return -EIO;
4425b411b363SPhilipp Reisner }
4426b411b363SPhilipp Reisner 
4427b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4428b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4429b411b363SPhilipp Reisner {
4430b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4431a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
443250d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
443350d0b1adSAndreas Gruenbacher 	unsigned int plain =
443450d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
443550d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
443650d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
443750d0b1adSAndreas Gruenbacher 	unsigned int r;
4438b411b363SPhilipp Reisner 
4439b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4440b411b363SPhilipp Reisner 	if (total == 0)
4441b411b363SPhilipp Reisner 		return;
4442b411b363SPhilipp Reisner 
4443b411b363SPhilipp Reisner 	/* don't report if not compressed */
4444b411b363SPhilipp Reisner 	if (total >= plain)
4445b411b363SPhilipp Reisner 		return;
4446b411b363SPhilipp Reisner 
4447b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4448b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4449b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4450b411b363SPhilipp Reisner 
4451b411b363SPhilipp Reisner 	if (r > 1000)
4452b411b363SPhilipp Reisner 		r = 1000;
4453b411b363SPhilipp Reisner 
4454b411b363SPhilipp Reisner 	r = 1000 - r;
4455d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4456b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4457b411b363SPhilipp Reisner 			direction,
4458b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4459b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4460b411b363SPhilipp Reisner 			total, r/10, r % 10);
4461b411b363SPhilipp Reisner }
4462b411b363SPhilipp Reisner 
4463b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4464b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4465b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4466b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4467b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4468b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4469b411b363SPhilipp Reisner 
4470b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4471bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4472b411b363SPhilipp Reisner {
44739f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4474b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4475b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
44762c46407dSAndreas Gruenbacher 	int err;
44774a76b161SAndreas Gruenbacher 
44789f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44799f4fe9adSAndreas Gruenbacher 	if (!peer_device)
44804a76b161SAndreas Gruenbacher 		return -EIO;
44819f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4482b411b363SPhilipp Reisner 
4483b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
448420ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
448520ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4486b411b363SPhilipp Reisner 
4487b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4488b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4489b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4490b411b363SPhilipp Reisner 	};
4491b411b363SPhilipp Reisner 
44922c46407dSAndreas Gruenbacher 	for(;;) {
4493e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
449469a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4495e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4496b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4497b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4498e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4499b411b363SPhilipp Reisner 
4500bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4501d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
450282bc0194SAndreas Gruenbacher 				err = -EIO;
4503b411b363SPhilipp Reisner 				goto out;
4504b411b363SPhilipp Reisner 			}
4505e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4506d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
450782bc0194SAndreas Gruenbacher 				err = -EIO;
450878fcbdaeSAndreas Gruenbacher 				goto out;
4509b411b363SPhilipp Reisner 			}
45109f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4511e658983aSAndreas Gruenbacher 			if (err)
4512e658983aSAndreas Gruenbacher 			       goto out;
451369a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4514b411b363SPhilipp Reisner 		} else {
4515d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
451682bc0194SAndreas Gruenbacher 			err = -EIO;
4517b411b363SPhilipp Reisner 			goto out;
4518b411b363SPhilipp Reisner 		}
4519b411b363SPhilipp Reisner 
4520e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4521bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4522b411b363SPhilipp Reisner 
45232c46407dSAndreas Gruenbacher 		if (err <= 0) {
45242c46407dSAndreas Gruenbacher 			if (err < 0)
45252c46407dSAndreas Gruenbacher 				goto out;
4526b411b363SPhilipp Reisner 			break;
45272c46407dSAndreas Gruenbacher 		}
45289f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
452982bc0194SAndreas Gruenbacher 		if (err)
4530b411b363SPhilipp Reisner 			goto out;
45312c46407dSAndreas Gruenbacher 	}
4532b411b363SPhilipp Reisner 
4533b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4534b411b363SPhilipp Reisner 
4535b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4536de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4537de1f8e4aSAndreas Gruenbacher 
4538b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
453982bc0194SAndreas Gruenbacher 		if (err)
4540b411b363SPhilipp Reisner 			goto out;
4541b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4542b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
45430b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4544b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4545b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4546b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4547d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4548b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4549b411b363SPhilipp Reisner 	}
455082bc0194SAndreas Gruenbacher 	err = 0;
4551b411b363SPhilipp Reisner 
4552b411b363SPhilipp Reisner  out:
4553b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4554b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4555b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
455682bc0194SAndreas Gruenbacher 	return err;
4557b411b363SPhilipp Reisner }
4558b411b363SPhilipp Reisner 
4559bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4560b411b363SPhilipp Reisner {
45611ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4562e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4563b411b363SPhilipp Reisner 
4564bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4565b411b363SPhilipp Reisner }
4566b411b363SPhilipp Reisner 
4567bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4568b411b363SPhilipp Reisner {
4569b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4570b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4571bde89a9eSAndreas Gruenbacher 	drbd_tcp_quickack(connection->data.socket);
4572b411b363SPhilipp Reisner 
457382bc0194SAndreas Gruenbacher 	return 0;
4574b411b363SPhilipp Reisner }
4575b411b363SPhilipp Reisner 
4576bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
457773a01a18SPhilipp Reisner {
45789f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4579b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4580e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
45814a76b161SAndreas Gruenbacher 
45829f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
45839f4fe9adSAndreas Gruenbacher 	if (!peer_device)
45844a76b161SAndreas Gruenbacher 		return -EIO;
45859f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
458673a01a18SPhilipp Reisner 
4587b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4588f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4589f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4590f735e363SLars Ellenberg 	case C_BEHIND:
4591f735e363SLars Ellenberg 			break;
4592f735e363SLars Ellenberg 	default:
4593d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4594b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4595f735e363SLars Ellenberg 	}
4596f735e363SLars Ellenberg 
4597b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
459873a01a18SPhilipp Reisner 
459982bc0194SAndreas Gruenbacher 	return 0;
460073a01a18SPhilipp Reisner }
460173a01a18SPhilipp Reisner 
460202918be2SPhilipp Reisner struct data_cmd {
460302918be2SPhilipp Reisner 	int expect_payload;
460402918be2SPhilipp Reisner 	size_t pkt_size;
4605bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
4606b411b363SPhilipp Reisner };
4607b411b363SPhilipp Reisner 
460802918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
460902918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
461002918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
461102918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
461202918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
4613e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
4614e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4615e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
461602918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
461702918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4618e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
4619e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
462002918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
462102918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
462202918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
462302918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
462402918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
462502918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
462602918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
462702918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
462802918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
462902918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
463073a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
46314a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
4632036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
4633a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
463402918be2SPhilipp Reisner };
463502918be2SPhilipp Reisner 
4636bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
4637b411b363SPhilipp Reisner {
463877351055SPhilipp Reisner 	struct packet_info pi;
463902918be2SPhilipp Reisner 	size_t shs; /* sub header size */
464082bc0194SAndreas Gruenbacher 	int err;
4641b411b363SPhilipp Reisner 
4642bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
4643deebe195SAndreas Gruenbacher 		struct data_cmd *cmd;
4644deebe195SAndreas Gruenbacher 
4645bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
4646944410e9SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header);
4647bde89a9eSAndreas Gruenbacher 		if (drbd_recv_header(connection, &pi))
464802918be2SPhilipp Reisner 			goto err_out;
464902918be2SPhilipp Reisner 
4650deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
46514a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
46521ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
46532fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
465402918be2SPhilipp Reisner 			goto err_out;
46550b33a916SLars Ellenberg 		}
4656b411b363SPhilipp Reisner 
4657e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
4658e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
46591ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
46602fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
4661c13f7e1aSLars Ellenberg 			goto err_out;
4662c13f7e1aSLars Ellenberg 		}
4663c13f7e1aSLars Ellenberg 
4664c13f7e1aSLars Ellenberg 		if (shs) {
4665944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
4666bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
4667a5c31904SAndreas Gruenbacher 			if (err)
466802918be2SPhilipp Reisner 				goto err_out;
4669e2857216SAndreas Gruenbacher 			pi.size -= shs;
4670b411b363SPhilipp Reisner 		}
467102918be2SPhilipp Reisner 
4672944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
4673bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
46744a76b161SAndreas Gruenbacher 		if (err) {
46751ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
46769f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
467702918be2SPhilipp Reisner 			goto err_out;
467802918be2SPhilipp Reisner 		}
467902918be2SPhilipp Reisner 	}
468082bc0194SAndreas Gruenbacher 	return;
468102918be2SPhilipp Reisner 
468202918be2SPhilipp Reisner     err_out:
4683bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
4684b411b363SPhilipp Reisner }
4685b411b363SPhilipp Reisner 
4686bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
4687f70b3511SPhilipp Reisner {
4688c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4689bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
4690376694a0SPhilipp Reisner 	int vnr;
4691f70b3511SPhilipp Reisner 
4692bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
4693b411b363SPhilipp Reisner 		return;
4694b411b363SPhilipp Reisner 
4695545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
4696545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
4697545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
4698545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
4699545752d5SLars Ellenberg 	 */
4700bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4701545752d5SLars Ellenberg 
4702668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
47031c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
4704668700b4SPhilipp Reisner 	if (connection->ack_sender) {
4705668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
4706668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
4707668700b4SPhilipp Reisner 	}
4708bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
4709360cc740SPhilipp Reisner 
4710c141ebdaSPhilipp Reisner 	rcu_read_lock();
4711c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4712c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
4713b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
4714c141ebdaSPhilipp Reisner 		rcu_read_unlock();
471569a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
4716c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
4717c141ebdaSPhilipp Reisner 		rcu_read_lock();
4718c141ebdaSPhilipp Reisner 	}
4719c141ebdaSPhilipp Reisner 	rcu_read_unlock();
4720c141ebdaSPhilipp Reisner 
4721bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
47221ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
472312038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4724bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
4725bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
472612038a3aSPhilipp Reisner 
47271ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
4728360cc740SPhilipp Reisner 
4729bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4730bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
4731cb703454SPhilipp Reisner 
47320500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
4733bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
4734bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
4735bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4736bbeb641cSPhilipp Reisner 
47370500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
4738360cc740SPhilipp Reisner 
4739f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
4740bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
4741360cc740SPhilipp Reisner }
4742360cc740SPhilipp Reisner 
474369a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
4744360cc740SPhilipp Reisner {
474569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
4746360cc740SPhilipp Reisner 	unsigned int i;
4747b411b363SPhilipp Reisner 
474885719573SPhilipp Reisner 	/* wait for current activity to cease. */
47490500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4750b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
4751b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
4752b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
47530500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4754b411b363SPhilipp Reisner 
4755b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
4756b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
4757b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
4758b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
4759b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
4760b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4761b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
4762b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
4763b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
4764b411b363SPhilipp Reisner 	 *  on the fly. */
4765b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
4766b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
4767b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
4768b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
4769b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
4770b411b363SPhilipp Reisner 
4771b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
4772b30ab791SAndreas Gruenbacher 	resync_timer_fn((unsigned long)device);
4773b411b363SPhilipp Reisner 
4774b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4775b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
4776b411b363SPhilipp Reisner 	 * to be "canceled" */
4777b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
4778b411b363SPhilipp Reisner 
4779b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
4780b411b363SPhilipp Reisner 
4781d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4782d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
4783d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4784b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
4785d10b4ea3SPhilipp Reisner 
478608332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
478708332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
4788b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
4789b411b363SPhilipp Reisner 
4790b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4791b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
4792b411b363SPhilipp Reisner 
4793b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
479469a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
4795b411b363SPhilipp Reisner 
4796b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4797b411b363SPhilipp Reisner 
479820ceb2b2SLars Ellenberg 	/* serialize with bitmap writeout triggered by the state change,
479920ceb2b2SLars Ellenberg 	 * if any. */
4800b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
480120ceb2b2SLars Ellenberg 
4802b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
4803b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
4804b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
4805b411b363SPhilipp Reisner 	 *
4806b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
4807b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
4808b411b363SPhilipp Reisner 	 */
4809b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
4810b411b363SPhilipp Reisner 	if (i)
4811d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
4812b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
4813435f0740SLars Ellenberg 	if (i)
4814d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
4815b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
4816b411b363SPhilipp Reisner 	if (i)
4817d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
4818b411b363SPhilipp Reisner 
48190b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
48200b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
48210b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
48220b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
4823b411b363SPhilipp Reisner 
4824360cc740SPhilipp Reisner 	return 0;
4825b411b363SPhilipp Reisner }
4826b411b363SPhilipp Reisner 
4827b411b363SPhilipp Reisner /*
4828b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4829b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
4830b411b363SPhilipp Reisner  *
4831b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
4832b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
4833b411b363SPhilipp Reisner  *
4834b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
4835b411b363SPhilipp Reisner  */
4836bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
4837b411b363SPhilipp Reisner {
48389f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
48399f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
4840b411b363SPhilipp Reisner 
4841bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
4842bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
48439f5bdc33SAndreas Gruenbacher 	if (!p)
4844e8d17b01SAndreas Gruenbacher 		return -EIO;
4845b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
4846b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4847b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
484820c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
4849bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
4850b411b363SPhilipp Reisner }
4851b411b363SPhilipp Reisner 
4852b411b363SPhilipp Reisner /*
4853b411b363SPhilipp Reisner  * return values:
4854b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
4855b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
4856b411b363SPhilipp Reisner  *  -1 peer talks different language,
4857b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
4858b411b363SPhilipp Reisner  */
4859bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
4860b411b363SPhilipp Reisner {
4861bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
4862e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
4863e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
486477351055SPhilipp Reisner 	struct packet_info pi;
4865a5c31904SAndreas Gruenbacher 	int err;
4866b411b363SPhilipp Reisner 
4867bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
4868e8d17b01SAndreas Gruenbacher 	if (err)
4869b411b363SPhilipp Reisner 		return 0;
4870b411b363SPhilipp Reisner 
4871bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
487269bc7bc3SAndreas Gruenbacher 	if (err)
4873b411b363SPhilipp Reisner 		return 0;
4874b411b363SPhilipp Reisner 
48756038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
48761ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
487777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4878b411b363SPhilipp Reisner 		return -1;
4879b411b363SPhilipp Reisner 	}
4880b411b363SPhilipp Reisner 
488177351055SPhilipp Reisner 	if (pi.size != expect) {
48821ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
488377351055SPhilipp Reisner 		     expect, pi.size);
4884b411b363SPhilipp Reisner 		return -1;
4885b411b363SPhilipp Reisner 	}
4886b411b363SPhilipp Reisner 
4887e658983aSAndreas Gruenbacher 	p = pi.data;
4888bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
4889a5c31904SAndreas Gruenbacher 	if (err)
4890b411b363SPhilipp Reisner 		return 0;
4891b411b363SPhilipp Reisner 
4892b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
4893b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
4894b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
4895b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
4896b411b363SPhilipp Reisner 
4897b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
4898b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
4899b411b363SPhilipp Reisner 		goto incompat;
4900b411b363SPhilipp Reisner 
4901bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
490220c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
4903b411b363SPhilipp Reisner 
49041ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
4905bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
4906b411b363SPhilipp Reisner 
490720c68fdeSLars Ellenberg 	drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
490820c68fdeSLars Ellenberg 		  connection->agreed_features & FF_TRIM ? " " : " not ");
490920c68fdeSLars Ellenberg 
4910b411b363SPhilipp Reisner 	return 1;
4911b411b363SPhilipp Reisner 
4912b411b363SPhilipp Reisner  incompat:
49131ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
4914b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
4915b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
4916b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
4917b411b363SPhilipp Reisner 	return -1;
4918b411b363SPhilipp Reisner }
4919b411b363SPhilipp Reisner 
4920b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4921bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
4922b411b363SPhilipp Reisner {
49231ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
49241ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
4925b10d96cbSJohannes Thoma 	return -1;
4926b411b363SPhilipp Reisner }
4927b411b363SPhilipp Reisner #else
4928b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
4929b10d96cbSJohannes Thoma 
4930b10d96cbSJohannes Thoma /* Return value:
4931b10d96cbSJohannes Thoma 	1 - auth succeeded,
4932b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
4933b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
4934b10d96cbSJohannes Thoma */
4935b10d96cbSJohannes Thoma 
4936bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
4937b411b363SPhilipp Reisner {
49389f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
4939b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
4940b411b363SPhilipp Reisner 	char *response = NULL;
4941b411b363SPhilipp Reisner 	char *right_response = NULL;
4942b411b363SPhilipp Reisner 	char *peers_ch = NULL;
494344ed167dSPhilipp Reisner 	unsigned int key_len;
494444ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
4945b411b363SPhilipp Reisner 	unsigned int resp_size;
49469534d671SHerbert Xu 	SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
494777351055SPhilipp Reisner 	struct packet_info pi;
494844ed167dSPhilipp Reisner 	struct net_conf *nc;
494969bc7bc3SAndreas Gruenbacher 	int err, rv;
4950b411b363SPhilipp Reisner 
49519f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
49529f5bdc33SAndreas Gruenbacher 
495344ed167dSPhilipp Reisner 	rcu_read_lock();
4954bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
495544ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
495644ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
495744ed167dSPhilipp Reisner 	rcu_read_unlock();
495844ed167dSPhilipp Reisner 
49599534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
49609534d671SHerbert Xu 	desc->flags = 0;
4961b411b363SPhilipp Reisner 
49629534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
4963b411b363SPhilipp Reisner 	if (rv) {
49649534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
4965b10d96cbSJohannes Thoma 		rv = -1;
4966b411b363SPhilipp Reisner 		goto fail;
4967b411b363SPhilipp Reisner 	}
4968b411b363SPhilipp Reisner 
4969b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
4970b411b363SPhilipp Reisner 
4971bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
4972bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
49739f5bdc33SAndreas Gruenbacher 		rv = 0;
49749f5bdc33SAndreas Gruenbacher 		goto fail;
49759f5bdc33SAndreas Gruenbacher 	}
4976bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
49779f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
4978b411b363SPhilipp Reisner 	if (!rv)
4979b411b363SPhilipp Reisner 		goto fail;
4980b411b363SPhilipp Reisner 
4981bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
498269bc7bc3SAndreas Gruenbacher 	if (err) {
4983b411b363SPhilipp Reisner 		rv = 0;
4984b411b363SPhilipp Reisner 		goto fail;
4985b411b363SPhilipp Reisner 	}
4986b411b363SPhilipp Reisner 
498777351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
49881ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
498977351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
4990b411b363SPhilipp Reisner 		rv = 0;
4991b411b363SPhilipp Reisner 		goto fail;
4992b411b363SPhilipp Reisner 	}
4993b411b363SPhilipp Reisner 
499477351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
49951ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
4996b10d96cbSJohannes Thoma 		rv = -1;
4997b411b363SPhilipp Reisner 		goto fail;
4998b411b363SPhilipp Reisner 	}
4999b411b363SPhilipp Reisner 
500067cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
500167cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
500267cca286SPhilipp Reisner 		rv = -1;
500367cca286SPhilipp Reisner 		goto fail;
500467cca286SPhilipp Reisner 	}
500567cca286SPhilipp Reisner 
500677351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
5007b411b363SPhilipp Reisner 	if (peers_ch == NULL) {
50081ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of peers_ch failed\n");
5009b10d96cbSJohannes Thoma 		rv = -1;
5010b411b363SPhilipp Reisner 		goto fail;
5011b411b363SPhilipp Reisner 	}
5012b411b363SPhilipp Reisner 
5013bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5014a5c31904SAndreas Gruenbacher 	if (err) {
5015b411b363SPhilipp Reisner 		rv = 0;
5016b411b363SPhilipp Reisner 		goto fail;
5017b411b363SPhilipp Reisner 	}
5018b411b363SPhilipp Reisner 
501967cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
502067cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
502167cca286SPhilipp Reisner 		rv = -1;
502267cca286SPhilipp Reisner 		goto fail;
502367cca286SPhilipp Reisner 	}
502467cca286SPhilipp Reisner 
50259534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5026b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
5027b411b363SPhilipp Reisner 	if (response == NULL) {
50281ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of response failed\n");
5029b10d96cbSJohannes Thoma 		rv = -1;
5030b411b363SPhilipp Reisner 		goto fail;
5031b411b363SPhilipp Reisner 	}
5032b411b363SPhilipp Reisner 
50339534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5034b411b363SPhilipp Reisner 	if (rv) {
50351ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5036b10d96cbSJohannes Thoma 		rv = -1;
5037b411b363SPhilipp Reisner 		goto fail;
5038b411b363SPhilipp Reisner 	}
5039b411b363SPhilipp Reisner 
5040bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
50419f5bdc33SAndreas Gruenbacher 		rv = 0;
50429f5bdc33SAndreas Gruenbacher 		goto fail;
50439f5bdc33SAndreas Gruenbacher 	}
5044bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
50459f5bdc33SAndreas Gruenbacher 				response, resp_size);
5046b411b363SPhilipp Reisner 	if (!rv)
5047b411b363SPhilipp Reisner 		goto fail;
5048b411b363SPhilipp Reisner 
5049bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
505069bc7bc3SAndreas Gruenbacher 	if (err) {
5051b411b363SPhilipp Reisner 		rv = 0;
5052b411b363SPhilipp Reisner 		goto fail;
5053b411b363SPhilipp Reisner 	}
5054b411b363SPhilipp Reisner 
505577351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
50561ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
505777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5058b411b363SPhilipp Reisner 		rv = 0;
5059b411b363SPhilipp Reisner 		goto fail;
5060b411b363SPhilipp Reisner 	}
5061b411b363SPhilipp Reisner 
506277351055SPhilipp Reisner 	if (pi.size != resp_size) {
50631ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5064b411b363SPhilipp Reisner 		rv = 0;
5065b411b363SPhilipp Reisner 		goto fail;
5066b411b363SPhilipp Reisner 	}
5067b411b363SPhilipp Reisner 
5068bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5069a5c31904SAndreas Gruenbacher 	if (err) {
5070b411b363SPhilipp Reisner 		rv = 0;
5071b411b363SPhilipp Reisner 		goto fail;
5072b411b363SPhilipp Reisner 	}
5073b411b363SPhilipp Reisner 
5074b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
50752d1ee87dSJulia Lawall 	if (right_response == NULL) {
50761ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of right_response failed\n");
5077b10d96cbSJohannes Thoma 		rv = -1;
5078b411b363SPhilipp Reisner 		goto fail;
5079b411b363SPhilipp Reisner 	}
5080b411b363SPhilipp Reisner 
50819534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
50829534d671SHerbert Xu 				 right_response);
5083b411b363SPhilipp Reisner 	if (rv) {
50841ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5085b10d96cbSJohannes Thoma 		rv = -1;
5086b411b363SPhilipp Reisner 		goto fail;
5087b411b363SPhilipp Reisner 	}
5088b411b363SPhilipp Reisner 
5089b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5090b411b363SPhilipp Reisner 
5091b411b363SPhilipp Reisner 	if (rv)
50921ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
509344ed167dSPhilipp Reisner 		     resp_size);
5094b10d96cbSJohannes Thoma 	else
5095b10d96cbSJohannes Thoma 		rv = -1;
5096b411b363SPhilipp Reisner 
5097b411b363SPhilipp Reisner  fail:
5098b411b363SPhilipp Reisner 	kfree(peers_ch);
5099b411b363SPhilipp Reisner 	kfree(response);
5100b411b363SPhilipp Reisner 	kfree(right_response);
51019534d671SHerbert Xu 	shash_desc_zero(desc);
5102b411b363SPhilipp Reisner 
5103b411b363SPhilipp Reisner 	return rv;
5104b411b363SPhilipp Reisner }
5105b411b363SPhilipp Reisner #endif
5106b411b363SPhilipp Reisner 
51078fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5108b411b363SPhilipp Reisner {
5109bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5110b411b363SPhilipp Reisner 	int h;
5111b411b363SPhilipp Reisner 
51121ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5113b411b363SPhilipp Reisner 
5114b411b363SPhilipp Reisner 	do {
5115bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5116b411b363SPhilipp Reisner 		if (h == 0) {
5117bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
511820ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5119b411b363SPhilipp Reisner 		}
5120b411b363SPhilipp Reisner 		if (h == -1) {
51211ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5122bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5123b411b363SPhilipp Reisner 		}
5124b411b363SPhilipp Reisner 	} while (h == 0);
5125b411b363SPhilipp Reisner 
512691fd4dadSPhilipp Reisner 	if (h > 0)
5127bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5128b411b363SPhilipp Reisner 
5129bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5130b411b363SPhilipp Reisner 
51311ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5132b411b363SPhilipp Reisner 	return 0;
5133b411b363SPhilipp Reisner }
5134b411b363SPhilipp Reisner 
5135b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5136b411b363SPhilipp Reisner 
5137bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5138b411b363SPhilipp Reisner {
5139e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5140b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5141b411b363SPhilipp Reisner 
5142b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5143bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5144b411b363SPhilipp Reisner 	} else {
5145bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
51461ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5147fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5148fc3b10a4SPhilipp Reisner 	}
5149bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5150e4f78edeSPhilipp Reisner 
51512735a594SAndreas Gruenbacher 	return 0;
5152fc3b10a4SPhilipp Reisner }
5153e4f78edeSPhilipp Reisner 
5154bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5155e4f78edeSPhilipp Reisner {
51569f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5157b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5158e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5159e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5160e4f78edeSPhilipp Reisner 
51619f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
51629f4fe9adSAndreas Gruenbacher 	if (!peer_device)
51632735a594SAndreas Gruenbacher 		return -EIO;
51649f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
51651952e916SAndreas Gruenbacher 
5166bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
51670b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5168bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
51694d0fc3fdSPhilipp Reisner 	}
51704d0fc3fdSPhilipp Reisner 
5171e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5172b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5173e4f78edeSPhilipp Reisner 	} else {
5174b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5175d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5176b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5177b411b363SPhilipp Reisner 	}
5178b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5179b411b363SPhilipp Reisner 
51802735a594SAndreas Gruenbacher 	return 0;
5181b411b363SPhilipp Reisner }
5182b411b363SPhilipp Reisner 
5183bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5184b411b363SPhilipp Reisner {
5185bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5186b411b363SPhilipp Reisner 
5187b411b363SPhilipp Reisner }
5188b411b363SPhilipp Reisner 
5189bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5190b411b363SPhilipp Reisner {
5191b411b363SPhilipp Reisner 	/* restore idle timeout */
5192bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5193bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5194bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5195b411b363SPhilipp Reisner 
51962735a594SAndreas Gruenbacher 	return 0;
5197b411b363SPhilipp Reisner }
5198b411b363SPhilipp Reisner 
5199bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5200b411b363SPhilipp Reisner {
52019f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5202b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5203e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5204b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5205b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5206b411b363SPhilipp Reisner 
52079f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
52089f4fe9adSAndreas Gruenbacher 	if (!peer_device)
52092735a594SAndreas Gruenbacher 		return -EIO;
52109f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
52111952e916SAndreas Gruenbacher 
52129f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5213b411b363SPhilipp Reisner 
521469a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5215b411b363SPhilipp Reisner 
5216b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5217b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5218b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5219b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5220b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5221b30ab791SAndreas Gruenbacher 		put_ldev(device);
52221d53f09eSLars Ellenberg 	}
5223b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5224b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5225b411b363SPhilipp Reisner 
52262735a594SAndreas Gruenbacher 	return 0;
5227b411b363SPhilipp Reisner }
5228b411b363SPhilipp Reisner 
5229bc9c5c41SAndreas Gruenbacher static int
5230b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5231bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5232bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5233b411b363SPhilipp Reisner {
5234b411b363SPhilipp Reisner 	struct drbd_request *req;
5235b411b363SPhilipp Reisner 	struct bio_and_error m;
5236b411b363SPhilipp Reisner 
52370500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5238b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5239b411b363SPhilipp Reisner 	if (unlikely(!req)) {
52400500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
524185997675SAndreas Gruenbacher 		return -EIO;
5242b411b363SPhilipp Reisner 	}
5243b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
52440500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5245b411b363SPhilipp Reisner 
5246b411b363SPhilipp Reisner 	if (m.bio)
5247b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
524885997675SAndreas Gruenbacher 	return 0;
5249b411b363SPhilipp Reisner }
5250b411b363SPhilipp Reisner 
5251bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5252b411b363SPhilipp Reisner {
52539f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5254b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5255e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5256b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5257b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5258b411b363SPhilipp Reisner 	enum drbd_req_event what;
5259b411b363SPhilipp Reisner 
52609f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
52619f4fe9adSAndreas Gruenbacher 	if (!peer_device)
52622735a594SAndreas Gruenbacher 		return -EIO;
52639f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
52641952e916SAndreas Gruenbacher 
526569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5266b411b363SPhilipp Reisner 
5267579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5268b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5269b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
52702735a594SAndreas Gruenbacher 		return 0;
5271b411b363SPhilipp Reisner 	}
5272e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5273b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
52748554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5275b411b363SPhilipp Reisner 		break;
5276b411b363SPhilipp Reisner 	case P_WRITE_ACK:
52778554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5278b411b363SPhilipp Reisner 		break;
5279b411b363SPhilipp Reisner 	case P_RECV_ACK:
52808554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5281b411b363SPhilipp Reisner 		break;
5282d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5283d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
52847be8da07SAndreas Gruenbacher 		break;
52857be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
52867be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5287b411b363SPhilipp Reisner 		break;
5288b411b363SPhilipp Reisner 	default:
52892735a594SAndreas Gruenbacher 		BUG();
5290b411b363SPhilipp Reisner 	}
5291b411b363SPhilipp Reisner 
5292b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5293b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5294bc9c5c41SAndreas Gruenbacher 					     what, false);
5295b411b363SPhilipp Reisner }
5296b411b363SPhilipp Reisner 
5297bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5298b411b363SPhilipp Reisner {
52999f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5300b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5301e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5302b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
53032deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
530485997675SAndreas Gruenbacher 	int err;
5305b411b363SPhilipp Reisner 
53069f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
53079f4fe9adSAndreas Gruenbacher 	if (!peer_device)
53082735a594SAndreas Gruenbacher 		return -EIO;
53099f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5310b411b363SPhilipp Reisner 
531169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5312b411b363SPhilipp Reisner 
5313579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5314b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5315b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
53162735a594SAndreas Gruenbacher 		return 0;
5317b411b363SPhilipp Reisner 	}
53182deb8336SPhilipp Reisner 
5319b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5320b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5321303d1448SPhilipp Reisner 					    NEG_ACKED, true);
532285997675SAndreas Gruenbacher 	if (err) {
53232deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
53242deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5325c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
53262deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
53272deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5328b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
53292deb8336SPhilipp Reisner 	}
53302735a594SAndreas Gruenbacher 	return 0;
5331b411b363SPhilipp Reisner }
5332b411b363SPhilipp Reisner 
5333bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5334b411b363SPhilipp Reisner {
53359f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5336b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5337e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5338b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5339b411b363SPhilipp Reisner 
53409f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
53419f4fe9adSAndreas Gruenbacher 	if (!peer_device)
53422735a594SAndreas Gruenbacher 		return -EIO;
53439f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
53441952e916SAndreas Gruenbacher 
534569a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
53467be8da07SAndreas Gruenbacher 
5347d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5348b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5349b411b363SPhilipp Reisner 
5350b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5351b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
53528554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5353b411b363SPhilipp Reisner }
5354b411b363SPhilipp Reisner 
5355bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5356b411b363SPhilipp Reisner {
53579f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5358b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5359b411b363SPhilipp Reisner 	sector_t sector;
5360b411b363SPhilipp Reisner 	int size;
5361e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
53621952e916SAndreas Gruenbacher 
53639f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
53649f4fe9adSAndreas Gruenbacher 	if (!peer_device)
53652735a594SAndreas Gruenbacher 		return -EIO;
53669f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5367b411b363SPhilipp Reisner 
5368b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5369b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5370b411b363SPhilipp Reisner 
537169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5372b411b363SPhilipp Reisner 
5373b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5374b411b363SPhilipp Reisner 
5375b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5376b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5377e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5378d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5379b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
5380d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5381d612d309SPhilipp Reisner 			break;
5382d612d309SPhilipp Reisner 		default:
53832735a594SAndreas Gruenbacher 			BUG();
5384d612d309SPhilipp Reisner 		}
5385b30ab791SAndreas Gruenbacher 		put_ldev(device);
5386b411b363SPhilipp Reisner 	}
5387b411b363SPhilipp Reisner 
53882735a594SAndreas Gruenbacher 	return 0;
5389b411b363SPhilipp Reisner }
5390b411b363SPhilipp Reisner 
5391bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5392b411b363SPhilipp Reisner {
5393e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5394c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
53959ed57dcbSLars Ellenberg 	int vnr;
5396b411b363SPhilipp Reisner 
5397bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5398b411b363SPhilipp Reisner 
53999ed57dcbSLars Ellenberg 	rcu_read_lock();
5400c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5401c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5402c06ece6bSAndreas Gruenbacher 
5403b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5404b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5405b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5406b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5407b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5408c4752ef1SPhilipp Reisner 		}
54099ed57dcbSLars Ellenberg 	}
54109ed57dcbSLars Ellenberg 	rcu_read_unlock();
5411c4752ef1SPhilipp Reisner 
54122735a594SAndreas Gruenbacher 	return 0;
5413b411b363SPhilipp Reisner }
5414b411b363SPhilipp Reisner 
5415bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5416b411b363SPhilipp Reisner {
54179f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5418b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5419e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
542084b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5421b411b363SPhilipp Reisner 	sector_t sector;
5422b411b363SPhilipp Reisner 	int size;
5423b411b363SPhilipp Reisner 
54249f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
54259f4fe9adSAndreas Gruenbacher 	if (!peer_device)
54262735a594SAndreas Gruenbacher 		return -EIO;
54279f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
54281952e916SAndreas Gruenbacher 
5429b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5430b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5431b411b363SPhilipp Reisner 
543269a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5433b411b363SPhilipp Reisner 
5434b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5435b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5436b411b363SPhilipp Reisner 	else
5437b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5438b411b363SPhilipp Reisner 
5439b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
54402735a594SAndreas Gruenbacher 		return 0;
54411d53f09eSLars Ellenberg 
5442b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5443b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5444b411b363SPhilipp Reisner 
5445b30ab791SAndreas Gruenbacher 	--device->ov_left;
5446ea5442afSLars Ellenberg 
5447ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5448b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5449b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5450ea5442afSLars Ellenberg 
5451b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
545284b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
545384b8c06bSAndreas Gruenbacher 		if (dw) {
545484b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
545584b8c06bSAndreas Gruenbacher 			dw->device = device;
545684b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5457b411b363SPhilipp Reisner 		} else {
545884b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5459b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5460b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5461b411b363SPhilipp Reisner 		}
5462b411b363SPhilipp Reisner 	}
5463b30ab791SAndreas Gruenbacher 	put_ldev(device);
54642735a594SAndreas Gruenbacher 	return 0;
5465b411b363SPhilipp Reisner }
5466b411b363SPhilipp Reisner 
5467bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
54680ced55a3SPhilipp Reisner {
54692735a594SAndreas Gruenbacher 	return 0;
54700ced55a3SPhilipp Reisner }
54710ced55a3SPhilipp Reisner 
5472668700b4SPhilipp Reisner struct meta_sock_cmd {
5473b411b363SPhilipp Reisner 	size_t pkt_size;
5474bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5475b411b363SPhilipp Reisner };
5476b411b363SPhilipp Reisner 
5477668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5478668700b4SPhilipp Reisner {
5479668700b4SPhilipp Reisner 	long t;
5480668700b4SPhilipp Reisner 	struct net_conf *nc;
5481668700b4SPhilipp Reisner 
5482668700b4SPhilipp Reisner 	rcu_read_lock();
5483668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5484668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5485668700b4SPhilipp Reisner 	rcu_read_unlock();
5486668700b4SPhilipp Reisner 
5487668700b4SPhilipp Reisner 	t *= HZ;
5488668700b4SPhilipp Reisner 	if (ping_timeout)
5489668700b4SPhilipp Reisner 		t /= 10;
5490668700b4SPhilipp Reisner 
5491668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5492668700b4SPhilipp Reisner }
5493668700b4SPhilipp Reisner 
5494668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5495668700b4SPhilipp Reisner {
5496668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5497668700b4SPhilipp Reisner }
5498668700b4SPhilipp Reisner 
5499668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5500668700b4SPhilipp Reisner {
5501668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5502668700b4SPhilipp Reisner }
5503668700b4SPhilipp Reisner 
5504668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5505e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5506e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5507b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5508b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5509b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5510d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5511b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5512b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5513b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5514b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5515b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5516b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5517b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
551802918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5519d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
55201952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
55211952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5522b411b363SPhilipp Reisner };
5523b411b363SPhilipp Reisner 
55241c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5525b411b363SPhilipp Reisner {
5526bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5527668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
552877351055SPhilipp Reisner 	struct packet_info pi;
5529668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5530257d0af6SPhilipp Reisner 	int rv;
5531bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5532b411b363SPhilipp Reisner 	int received = 0;
5533bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
553452b061a4SAndreas Gruenbacher 	int expect   = header_size;
553544ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
55363990e04dSPhilipp Reisner 	struct sched_param param = { .sched_priority = 2 };
5537b411b363SPhilipp Reisner 
55383990e04dSPhilipp Reisner 	rv = sched_setscheduler(current, SCHED_RR, &param);
55393990e04dSPhilipp Reisner 	if (rv < 0)
5540668700b4SPhilipp Reisner 		drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
5541b411b363SPhilipp Reisner 
5542e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
554380822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
554444ed167dSPhilipp Reisner 
5545668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
554644ed167dSPhilipp Reisner 
5547bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5548bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
55491ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
5550841ce241SAndreas Gruenbacher 				goto reconnect;
5551841ce241SAndreas Gruenbacher 			}
5552668700b4SPhilipp Reisner 			set_ping_timeout(connection);
555344ed167dSPhilipp Reisner 			ping_timeout_active = true;
5554b411b363SPhilipp Reisner 		}
5555b411b363SPhilipp Reisner 
5556668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
5557bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5558b411b363SPhilipp Reisner 
5559b411b363SPhilipp Reisner 		/* Note:
5560b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5561b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5562b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5563b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5564b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5565b411b363SPhilipp Reisner 		 * rv == expected: full header or command
5566b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
5567b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
5568b411b363SPhilipp Reisner 		 */
5569b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
5570b411b363SPhilipp Reisner 			received += rv;
5571b411b363SPhilipp Reisner 			buf	 += rv;
5572b411b363SPhilipp Reisner 		} else if (rv == 0) {
5573bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
5574b66623e3SPhilipp Reisner 				long t;
5575b66623e3SPhilipp Reisner 				rcu_read_lock();
5576bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
5577b66623e3SPhilipp Reisner 				rcu_read_unlock();
5578b66623e3SPhilipp Reisner 
5579bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
5580bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
5581b66623e3SPhilipp Reisner 						       t);
5582599377acSPhilipp Reisner 				if (t)
5583599377acSPhilipp Reisner 					break;
5584599377acSPhilipp Reisner 			}
55851ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
5586b411b363SPhilipp Reisner 			goto reconnect;
5587b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
5588cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
5589cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
5590668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
5591cb6518cbSLars Ellenberg 				continue;
5592f36af18cSLars Ellenberg 			if (ping_timeout_active) {
55931ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
5594b411b363SPhilipp Reisner 				goto reconnect;
5595b411b363SPhilipp Reisner 			}
5596bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
5597b411b363SPhilipp Reisner 			continue;
5598b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
5599668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
5600668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
5601668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
5602668700b4SPhilipp Reisner 			flush_signals(current);
5603b411b363SPhilipp Reisner 			continue;
5604b411b363SPhilipp Reisner 		} else {
56051ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
5606b411b363SPhilipp Reisner 			goto reconnect;
5607b411b363SPhilipp Reisner 		}
5608b411b363SPhilipp Reisner 
5609b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
5610bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
5611b411b363SPhilipp Reisner 				goto reconnect;
5612668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
5613668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
56141ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
56152fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
5616b411b363SPhilipp Reisner 				goto disconnect;
5617b411b363SPhilipp Reisner 			}
5618e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
561952b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
56201ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
562177351055SPhilipp Reisner 					pi.cmd, pi.size);
5622b411b363SPhilipp Reisner 				goto reconnect;
5623b411b363SPhilipp Reisner 			}
5624257d0af6SPhilipp Reisner 		}
5625b411b363SPhilipp Reisner 		if (received == expect) {
56262735a594SAndreas Gruenbacher 			bool err;
5627a4fbda8eSPhilipp Reisner 
5628bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
56292735a594SAndreas Gruenbacher 			if (err) {
56301ec861ebSAndreas Gruenbacher 				drbd_err(connection, "%pf failed\n", cmd->fn);
5631b411b363SPhilipp Reisner 				goto reconnect;
56321952e916SAndreas Gruenbacher 			}
5633b411b363SPhilipp Reisner 
5634bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
5635f36af18cSLars Ellenberg 
5636668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
5637668700b4SPhilipp Reisner 				set_idle_timeout(connection);
563844ed167dSPhilipp Reisner 				ping_timeout_active = false;
563944ed167dSPhilipp Reisner 			}
5640b411b363SPhilipp Reisner 
5641bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
5642b411b363SPhilipp Reisner 			received = 0;
564352b061a4SAndreas Gruenbacher 			expect	 = header_size;
5644b411b363SPhilipp Reisner 			cmd	 = NULL;
5645b411b363SPhilipp Reisner 		}
5646b411b363SPhilipp Reisner 	}
5647b411b363SPhilipp Reisner 
5648b411b363SPhilipp Reisner 	if (0) {
5649b411b363SPhilipp Reisner reconnect:
5650bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5651bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
5652b411b363SPhilipp Reisner 	}
5653b411b363SPhilipp Reisner 	if (0) {
5654b411b363SPhilipp Reisner disconnect:
5655bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5656b411b363SPhilipp Reisner 	}
5657b411b363SPhilipp Reisner 
5658668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
5659b411b363SPhilipp Reisner 
5660b411b363SPhilipp Reisner 	return 0;
5661b411b363SPhilipp Reisner }
5662668700b4SPhilipp Reisner 
5663668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
5664668700b4SPhilipp Reisner {
5665668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
5666668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
5667668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
5668668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
5669668700b4SPhilipp Reisner 	struct net_conf *nc;
5670668700b4SPhilipp Reisner 	int tcp_cork, err;
5671668700b4SPhilipp Reisner 
5672668700b4SPhilipp Reisner 	rcu_read_lock();
5673668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5674668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
5675668700b4SPhilipp Reisner 	rcu_read_unlock();
5676668700b4SPhilipp Reisner 
5677668700b4SPhilipp Reisner 	if (tcp_cork)
5678668700b4SPhilipp Reisner 		drbd_tcp_cork(connection->meta.socket);
5679668700b4SPhilipp Reisner 
5680668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
5681668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
5682668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
5683668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
5684668700b4SPhilipp Reisner 
5685668700b4SPhilipp Reisner 	if (err) {
5686668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5687668700b4SPhilipp Reisner 		return;
5688668700b4SPhilipp Reisner 	}
5689668700b4SPhilipp Reisner 
5690668700b4SPhilipp Reisner 	if (tcp_cork)
5691668700b4SPhilipp Reisner 		drbd_tcp_uncork(connection->meta.socket);
5692668700b4SPhilipp Reisner 
5693668700b4SPhilipp Reisner 	return;
5694668700b4SPhilipp Reisner }
5695