193c68cc4SChristoph Böhmwalder // SPDX-License-Identifier: GPL-2.0-only
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #include <linux/unistd.h>
31b411b363SPhilipp Reisner #include <linux/vmalloc.h>
32b411b363SPhilipp Reisner #include <linux/random.h>
33b411b363SPhilipp Reisner #include <linux/string.h>
34b411b363SPhilipp Reisner #include <linux/scatterlist.h>
35c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
36b411b363SPhilipp Reisner #include "drbd_int.h"
37a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
38b411b363SPhilipp Reisner #include "drbd_req.h"
39b411b363SPhilipp Reisner #include "drbd_vli.h"
40b411b363SPhilipp Reisner 
41f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4220c68fdeSLars Ellenberg 
4377351055SPhilipp Reisner struct packet_info {
4477351055SPhilipp Reisner 	enum drbd_packet cmd;
45e2857216SAndreas Gruenbacher 	unsigned int size;
46e2857216SAndreas Gruenbacher 	unsigned int vnr;
47e658983aSAndreas Gruenbacher 	void *data;
4877351055SPhilipp Reisner };
4977351055SPhilipp Reisner 
50b411b363SPhilipp Reisner enum finish_epoch {
51b411b363SPhilipp Reisner 	FE_STILL_LIVE,
52b411b363SPhilipp Reisner 	FE_DESTROYED,
53b411b363SPhilipp Reisner 	FE_RECYCLED,
54b411b363SPhilipp Reisner };
55b411b363SPhilipp Reisner 
56bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
57bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
59a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
60bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
62b411b363SPhilipp Reisner 
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
65b411b363SPhilipp Reisner 
6645bb912bSLars Ellenberg /*
6745bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6845bb912bSLars Ellenberg  * page->private being our "next" pointer.
6945bb912bSLars Ellenberg  */
7045bb912bSLars Ellenberg 
7145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7245bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7345bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7445bb912bSLars Ellenberg  */
page_chain_del(struct page ** head,int n)7545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7645bb912bSLars Ellenberg {
7745bb912bSLars Ellenberg 	struct page *page;
7845bb912bSLars Ellenberg 	struct page *tmp;
7945bb912bSLars Ellenberg 
8045bb912bSLars Ellenberg 	BUG_ON(!n);
8145bb912bSLars Ellenberg 	BUG_ON(!head);
8245bb912bSLars Ellenberg 
8345bb912bSLars Ellenberg 	page = *head;
8423ce4227SPhilipp Reisner 
8523ce4227SPhilipp Reisner 	if (!page)
8623ce4227SPhilipp Reisner 		return NULL;
8723ce4227SPhilipp Reisner 
8845bb912bSLars Ellenberg 	while (page) {
8945bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9045bb912bSLars Ellenberg 		if (--n == 0)
9145bb912bSLars Ellenberg 			break; /* found sufficient pages */
9245bb912bSLars Ellenberg 		if (tmp == NULL)
9345bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9445bb912bSLars Ellenberg 			return NULL;
9545bb912bSLars Ellenberg 		page = tmp;
9645bb912bSLars Ellenberg 	}
9745bb912bSLars Ellenberg 
9845bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
9945bb912bSLars Ellenberg 	set_page_private(page, 0);
10045bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10145bb912bSLars Ellenberg 	page = *head;
10245bb912bSLars Ellenberg 	*head = tmp;
10345bb912bSLars Ellenberg 	return page;
10445bb912bSLars Ellenberg }
10545bb912bSLars Ellenberg 
10645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10745bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10845bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
page_chain_tail(struct page * page,int * len)10945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11045bb912bSLars Ellenberg {
11145bb912bSLars Ellenberg 	struct page *tmp;
11245bb912bSLars Ellenberg 	int i = 1;
113e8628013SJoe Perches 	while ((tmp = page_chain_next(page))) {
114e8628013SJoe Perches 		++i;
115e8628013SJoe Perches 		page = tmp;
116e8628013SJoe Perches 	}
11745bb912bSLars Ellenberg 	if (len)
11845bb912bSLars Ellenberg 		*len = i;
11945bb912bSLars Ellenberg 	return page;
12045bb912bSLars Ellenberg }
12145bb912bSLars Ellenberg 
page_chain_free(struct page * page)12245bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12345bb912bSLars Ellenberg {
12445bb912bSLars Ellenberg 	struct page *tmp;
12545bb912bSLars Ellenberg 	int i = 0;
12645bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12745bb912bSLars Ellenberg 		put_page(page);
12845bb912bSLars Ellenberg 		++i;
12945bb912bSLars Ellenberg 	}
13045bb912bSLars Ellenberg 	return i;
13145bb912bSLars Ellenberg }
13245bb912bSLars Ellenberg 
page_chain_add(struct page ** head,struct page * chain_first,struct page * chain_last)13345bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13445bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13545bb912bSLars Ellenberg {
13645bb912bSLars Ellenberg #if 1
13745bb912bSLars Ellenberg 	struct page *tmp;
13845bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
13945bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
14045bb912bSLars Ellenberg #endif
14145bb912bSLars Ellenberg 
14245bb912bSLars Ellenberg 	/* add chain to head */
14345bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14445bb912bSLars Ellenberg 	*head = chain_first;
14545bb912bSLars Ellenberg }
14645bb912bSLars Ellenberg 
__drbd_alloc_pages(struct drbd_device * device,unsigned int number)147b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14818c2d522SAndreas Gruenbacher 				       unsigned int number)
149b411b363SPhilipp Reisner {
150b411b363SPhilipp Reisner 	struct page *page = NULL;
15145bb912bSLars Ellenberg 	struct page *tmp = NULL;
15218c2d522SAndreas Gruenbacher 	unsigned int i = 0;
153b411b363SPhilipp Reisner 
154b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
155b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15645bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
157b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15845bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
15945bb912bSLars Ellenberg 		if (page)
16045bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
161b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16245bb912bSLars Ellenberg 		if (page)
16345bb912bSLars Ellenberg 			return page;
164b411b363SPhilipp Reisner 	}
16545bb912bSLars Ellenberg 
166b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
167b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
168b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
16945bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
17045bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
17145bb912bSLars Ellenberg 		if (!tmp)
17245bb912bSLars Ellenberg 			break;
17345bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17445bb912bSLars Ellenberg 		page = tmp;
17545bb912bSLars Ellenberg 	}
17645bb912bSLars Ellenberg 
17745bb912bSLars Ellenberg 	if (i == number)
178b411b363SPhilipp Reisner 		return page;
17945bb912bSLars Ellenberg 
18045bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
181c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18245bb912bSLars Ellenberg 	 * function "soon". */
18345bb912bSLars Ellenberg 	if (page) {
18445bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18545bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18645bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18745bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18845bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
18945bb912bSLars Ellenberg 	}
19045bb912bSLars Ellenberg 	return NULL;
191b411b363SPhilipp Reisner }
192b411b363SPhilipp Reisner 
reclaim_finished_net_peer_reqs(struct drbd_device * device,struct list_head * to_be_freed)193b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
194a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
195b411b363SPhilipp Reisner {
196a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
197b411b363SPhilipp Reisner 
198b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
199b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
200b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
201b411b363SPhilipp Reisner 	   stop to examine the list... */
202b411b363SPhilipp Reisner 
203a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
204045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
205b411b363SPhilipp Reisner 			break;
206a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
207b411b363SPhilipp Reisner 	}
208b411b363SPhilipp Reisner }
209b411b363SPhilipp Reisner 
drbd_reclaim_net_peer_reqs(struct drbd_device * device)210668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
211b411b363SPhilipp Reisner {
212b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
213db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
214b411b363SPhilipp Reisner 
2150500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
216b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2170500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
218a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
219b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
220b411b363SPhilipp Reisner }
221b411b363SPhilipp Reisner 
conn_reclaim_net_peer_reqs(struct drbd_connection * connection)222668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
223668700b4SPhilipp Reisner {
224668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
225668700b4SPhilipp Reisner 	int vnr;
226668700b4SPhilipp Reisner 
227668700b4SPhilipp Reisner 	rcu_read_lock();
228668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
229668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
230668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
231668700b4SPhilipp Reisner 			continue;
232668700b4SPhilipp Reisner 
233668700b4SPhilipp Reisner 		kref_get(&device->kref);
234668700b4SPhilipp Reisner 		rcu_read_unlock();
235668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
236668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
237668700b4SPhilipp Reisner 		rcu_read_lock();
238668700b4SPhilipp Reisner 	}
239668700b4SPhilipp Reisner 	rcu_read_unlock();
240668700b4SPhilipp Reisner }
241668700b4SPhilipp Reisner 
242b411b363SPhilipp Reisner /**
243c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2449b48ff07SLee Jones  * @peer_device:	DRBD device.
24545bb912bSLars Ellenberg  * @number:		number of pages requested
24645bb912bSLars Ellenberg  * @retry:		whether to retry, if not enough pages are available right now
247b411b363SPhilipp Reisner  *
24845bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2490e49d7b0SLars Ellenberg  * the kernel.
25045bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
25145bb912bSLars Ellenberg  *
2520e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2530e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2540e49d7b0SLars Ellenberg  *
2550e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2560e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2570e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2580e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2590e49d7b0SLars Ellenberg  *
26045bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
261b411b363SPhilipp Reisner  */
drbd_alloc_pages(struct drbd_peer_device * peer_device,unsigned int number,bool retry)26269a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
263c37c8ecfSAndreas Gruenbacher 			      bool retry)
264b411b363SPhilipp Reisner {
26569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
266b411b363SPhilipp Reisner 	struct page *page = NULL;
26744ed167dSPhilipp Reisner 	struct net_conf *nc;
268b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2690e49d7b0SLars Ellenberg 	unsigned int mxb;
270b411b363SPhilipp Reisner 
27144ed167dSPhilipp Reisner 	rcu_read_lock();
27269a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27344ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27444ed167dSPhilipp Reisner 	rcu_read_unlock();
27544ed167dSPhilipp Reisner 
276b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
277b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
278b411b363SPhilipp Reisner 
279668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
280668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
281668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
282668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
283668700b4SPhilipp Reisner 
28445bb912bSLars Ellenberg 	while (page == NULL) {
285b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
286b411b363SPhilipp Reisner 
287668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
288b411b363SPhilipp Reisner 
289b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
290b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
291b411b363SPhilipp Reisner 			if (page)
292b411b363SPhilipp Reisner 				break;
293b411b363SPhilipp Reisner 		}
294b411b363SPhilipp Reisner 
295b411b363SPhilipp Reisner 		if (!retry)
296b411b363SPhilipp Reisner 			break;
297b411b363SPhilipp Reisner 
298b411b363SPhilipp Reisner 		if (signal_pending(current)) {
299d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
300b411b363SPhilipp Reisner 			break;
301b411b363SPhilipp Reisner 		}
302b411b363SPhilipp Reisner 
3030e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3040e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
305b411b363SPhilipp Reisner 	}
306b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
307b411b363SPhilipp Reisner 
30845bb912bSLars Ellenberg 	if (page)
309b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
310b411b363SPhilipp Reisner 	return page;
311b411b363SPhilipp Reisner }
312b411b363SPhilipp Reisner 
313c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3140500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31545bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31645bb912bSLars Ellenberg  * or returns all pages to the system. */
drbd_free_pages(struct drbd_device * device,struct page * page,int is_net)317b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
318b411b363SPhilipp Reisner {
319b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
320b411b363SPhilipp Reisner 	int i;
321435f0740SLars Ellenberg 
322a73ff323SLars Ellenberg 	if (page == NULL)
323a73ff323SLars Ellenberg 		return;
324a73ff323SLars Ellenberg 
325183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32645bb912bSLars Ellenberg 		i = page_chain_free(page);
32745bb912bSLars Ellenberg 	else {
32845bb912bSLars Ellenberg 		struct page *tmp;
32945bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
330b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
33145bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33245bb912bSLars Ellenberg 		drbd_pp_vacant += i;
333b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
334b411b363SPhilipp Reisner 	}
335435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33645bb912bSLars Ellenberg 	if (i < 0)
337d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
338435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
339b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
340b411b363SPhilipp Reisner }
341b411b363SPhilipp Reisner 
342b411b363SPhilipp Reisner /*
343b411b363SPhilipp Reisner You need to hold the req_lock:
344b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
345b411b363SPhilipp Reisner 
346b411b363SPhilipp Reisner You must not have the req_lock:
3473967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3480db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3497721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
350b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
351a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
352b411b363SPhilipp Reisner  drbd_clear_done_ee()
353b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
354b411b363SPhilipp Reisner */
355b411b363SPhilipp Reisner 
3569104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3579104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3589104d31aSLars Ellenberg  * trim: payload_size == 0 */
359f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
drbd_alloc_peer_req(struct drbd_peer_device * peer_device,u64 id,sector_t sector,unsigned int request_size,unsigned int payload_size,gfp_t gfp_mask)36069a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3619104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
362b411b363SPhilipp Reisner {
36369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
364db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
365a73ff323SLars Ellenberg 	struct page *page = NULL;
366e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(payload_size);
367b411b363SPhilipp Reisner 
368b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
369b411b363SPhilipp Reisner 		return NULL;
370b411b363SPhilipp Reisner 
3710892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
372db830c46SAndreas Gruenbacher 	if (!peer_req) {
373b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
374d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
375b411b363SPhilipp Reisner 		return NULL;
376b411b363SPhilipp Reisner 	}
377b411b363SPhilipp Reisner 
3789104d31aSLars Ellenberg 	if (nr_pages) {
379d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
380d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
38145bb912bSLars Ellenberg 		if (!page)
38245bb912bSLars Ellenberg 			goto fail;
383a73ff323SLars Ellenberg 	}
384b411b363SPhilipp Reisner 
385c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
386c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
387db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3889104d31aSLars Ellenberg 	peer_req->i.size = request_size;
389db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
390c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
391a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
392db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3939a8e7753SAndreas Gruenbacher 	/*
3949a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3959a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3969a8e7753SAndreas Gruenbacher 	 */
397db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
398b411b363SPhilipp Reisner 
399db830c46SAndreas Gruenbacher 	return peer_req;
400b411b363SPhilipp Reisner 
40145bb912bSLars Ellenberg  fail:
4020892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
403b411b363SPhilipp Reisner 	return NULL;
404b411b363SPhilipp Reisner }
405b411b363SPhilipp Reisner 
__drbd_free_peer_req(struct drbd_device * device,struct drbd_peer_request * peer_req,int is_net)406b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
407f6ffca9fSAndreas Gruenbacher 		       int is_net)
408b411b363SPhilipp Reisner {
40921ae5d7fSLars Ellenberg 	might_sleep();
410db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
411db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
412b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4130b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4140b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
415677b3672SChristoph Böhmwalder 	if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41621ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41721ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41821ae5d7fSLars Ellenberg 	}
4190892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
420b411b363SPhilipp Reisner }
421b411b363SPhilipp Reisner 
drbd_free_peer_reqs(struct drbd_device * device,struct list_head * list)422b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
423b411b363SPhilipp Reisner {
424b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
425db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
426b411b363SPhilipp Reisner 	int count = 0;
427b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
428b411b363SPhilipp Reisner 
4290500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
430b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4310500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
432b411b363SPhilipp Reisner 
433a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
434b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
435b411b363SPhilipp Reisner 		count++;
436b411b363SPhilipp Reisner 	}
437b411b363SPhilipp Reisner 	return count;
438b411b363SPhilipp Reisner }
439b411b363SPhilipp Reisner 
440b411b363SPhilipp Reisner /*
441a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
442b411b363SPhilipp Reisner  */
drbd_finish_peer_reqs(struct drbd_device * device)443b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
444b411b363SPhilipp Reisner {
445b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
446b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
447db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
448e2b3032bSAndreas Gruenbacher 	int err = 0;
449b411b363SPhilipp Reisner 
4500500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
451b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
452b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4530500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
454b411b363SPhilipp Reisner 
455a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
456b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
457b411b363SPhilipp Reisner 
458b411b363SPhilipp Reisner 	/* possible callbacks here:
459d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
460b411b363SPhilipp Reisner 	 * all ignore the last argument.
461b411b363SPhilipp Reisner 	 */
462a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
463e2b3032bSAndreas Gruenbacher 		int err2;
464e2b3032bSAndreas Gruenbacher 
465b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
466a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
467e2b3032bSAndreas Gruenbacher 		if (!err)
468e2b3032bSAndreas Gruenbacher 			err = err2;
469b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
470b411b363SPhilipp Reisner 	}
471b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
472b411b363SPhilipp Reisner 
473e2b3032bSAndreas Gruenbacher 	return err;
474b411b363SPhilipp Reisner }
475b411b363SPhilipp Reisner 
_drbd_wait_ee_list_empty(struct drbd_device * device,struct list_head * head)476b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
477d4da1537SAndreas Gruenbacher 				     struct list_head *head)
478b411b363SPhilipp Reisner {
479b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
480b411b363SPhilipp Reisner 
481b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
482b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
483b411b363SPhilipp Reisner 	while (!list_empty(head)) {
484b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4850500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4867eaceaccSJens Axboe 		io_schedule();
487b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4880500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
489b411b363SPhilipp Reisner 	}
490b411b363SPhilipp Reisner }
491b411b363SPhilipp Reisner 
drbd_wait_ee_list_empty(struct drbd_device * device,struct list_head * head)492b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
493d4da1537SAndreas Gruenbacher 				    struct list_head *head)
494b411b363SPhilipp Reisner {
4950500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
496b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4970500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
498b411b363SPhilipp Reisner }
499b411b363SPhilipp Reisner 
drbd_recv_short(struct socket * sock,void * buf,size_t size,int flags)500dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
501b411b363SPhilipp Reisner {
502b411b363SPhilipp Reisner 	struct kvec iov = {
503b411b363SPhilipp Reisner 		.iov_base = buf,
504b411b363SPhilipp Reisner 		.iov_len = size,
505b411b363SPhilipp Reisner 	};
506b411b363SPhilipp Reisner 	struct msghdr msg = {
507b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
508b411b363SPhilipp Reisner 	};
509de4eda9dSAl Viro 	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
510f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
511b411b363SPhilipp Reisner }
512b411b363SPhilipp Reisner 
drbd_recv(struct drbd_connection * connection,void * buf,size_t size)513bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
514b411b363SPhilipp Reisner {
515b411b363SPhilipp Reisner 	int rv;
516b411b363SPhilipp Reisner 
517bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
518b411b363SPhilipp Reisner 
519b411b363SPhilipp Reisner 	if (rv < 0) {
520b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5211ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
522b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5231ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
524b411b363SPhilipp Reisner 	} else if (rv == 0) {
525bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
526b66623e3SPhilipp Reisner 			long t;
527b66623e3SPhilipp Reisner 			rcu_read_lock();
528bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
529b66623e3SPhilipp Reisner 			rcu_read_unlock();
530b66623e3SPhilipp Reisner 
531bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
532b66623e3SPhilipp Reisner 
533599377acSPhilipp Reisner 			if (t)
534599377acSPhilipp Reisner 				goto out;
535599377acSPhilipp Reisner 		}
5361ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
537599377acSPhilipp Reisner 	}
538599377acSPhilipp Reisner 
539b411b363SPhilipp Reisner 	if (rv != size)
540bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
541b411b363SPhilipp Reisner 
542599377acSPhilipp Reisner out:
543b411b363SPhilipp Reisner 	return rv;
544b411b363SPhilipp Reisner }
545b411b363SPhilipp Reisner 
drbd_recv_all(struct drbd_connection * connection,void * buf,size_t size)546bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
547c6967746SAndreas Gruenbacher {
548c6967746SAndreas Gruenbacher 	int err;
549c6967746SAndreas Gruenbacher 
550bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
551c6967746SAndreas Gruenbacher 	if (err != size) {
552c6967746SAndreas Gruenbacher 		if (err >= 0)
553c6967746SAndreas Gruenbacher 			err = -EIO;
554c6967746SAndreas Gruenbacher 	} else
555c6967746SAndreas Gruenbacher 		err = 0;
556c6967746SAndreas Gruenbacher 	return err;
557c6967746SAndreas Gruenbacher }
558c6967746SAndreas Gruenbacher 
drbd_recv_all_warn(struct drbd_connection * connection,void * buf,size_t size)559bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
560a5c31904SAndreas Gruenbacher {
561a5c31904SAndreas Gruenbacher 	int err;
562a5c31904SAndreas Gruenbacher 
563bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
564a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5651ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
566a5c31904SAndreas Gruenbacher 	return err;
567a5c31904SAndreas Gruenbacher }
568a5c31904SAndreas Gruenbacher 
5695dbf1673SLars Ellenberg /* quoting tcp(7):
5705dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5715dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5725dbf1673SLars Ellenberg  * This is our wrapper to do so.
5735dbf1673SLars Ellenberg  */
drbd_setbufsize(struct socket * sock,unsigned int snd,unsigned int rcv)5745dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5755dbf1673SLars Ellenberg 		unsigned int rcv)
5765dbf1673SLars Ellenberg {
5775dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5785dbf1673SLars Ellenberg 	if (snd) {
5795dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5805dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5815dbf1673SLars Ellenberg 	}
5825dbf1673SLars Ellenberg 	if (rcv) {
5835dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5845dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5855dbf1673SLars Ellenberg 	}
5865dbf1673SLars Ellenberg }
5875dbf1673SLars Ellenberg 
drbd_try_connect(struct drbd_connection * connection)588bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
589b411b363SPhilipp Reisner {
590b411b363SPhilipp Reisner 	const char *what;
591b411b363SPhilipp Reisner 	struct socket *sock;
592b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59344ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59444ed167dSPhilipp Reisner 	struct net_conf *nc;
59544ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59669ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
597b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
598b411b363SPhilipp Reisner 
59944ed167dSPhilipp Reisner 	rcu_read_lock();
600bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
60144ed167dSPhilipp Reisner 	if (!nc) {
60244ed167dSPhilipp Reisner 		rcu_read_unlock();
603b411b363SPhilipp Reisner 		return NULL;
60444ed167dSPhilipp Reisner 	}
60544ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60644ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60769ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
608089c075dSAndreas Gruenbacher 	rcu_read_unlock();
60944ed167dSPhilipp Reisner 
610bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
611bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61244ed167dSPhilipp Reisner 
613bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61444ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61544ed167dSPhilipp Reisner 	else
61644ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61744ed167dSPhilipp Reisner 
618bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
619bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
620b411b363SPhilipp Reisner 
621b411b363SPhilipp Reisner 	what = "sock_create_kern";
622eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
623b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
624b411b363SPhilipp Reisner 	if (err < 0) {
625b411b363SPhilipp Reisner 		sock = NULL;
626b411b363SPhilipp Reisner 		goto out;
627b411b363SPhilipp Reisner 	}
628b411b363SPhilipp Reisner 
629b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
63069ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
63144ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
632b411b363SPhilipp Reisner 
633b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
634b411b363SPhilipp Reisner 	*  for the outgoing connections.
635b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
636b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
637b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
638b411b363SPhilipp Reisner 	*  a free one dynamically.
639b411b363SPhilipp Reisner 	*/
640b411b363SPhilipp Reisner 	what = "bind before connect";
64144ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
642b411b363SPhilipp Reisner 	if (err < 0)
643b411b363SPhilipp Reisner 		goto out;
644b411b363SPhilipp Reisner 
645b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
646b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
647b411b363SPhilipp Reisner 	disconnect_on_error = 0;
648b411b363SPhilipp Reisner 	what = "connect";
64944ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
650b411b363SPhilipp Reisner 
651b411b363SPhilipp Reisner out:
652b411b363SPhilipp Reisner 	if (err < 0) {
653b411b363SPhilipp Reisner 		if (sock) {
654b411b363SPhilipp Reisner 			sock_release(sock);
655b411b363SPhilipp Reisner 			sock = NULL;
656b411b363SPhilipp Reisner 		}
657b411b363SPhilipp Reisner 		switch (-err) {
658b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
659b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
660b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
661b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
662b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
663b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
664b411b363SPhilipp Reisner 			disconnect_on_error = 0;
665b411b363SPhilipp Reisner 			break;
666b411b363SPhilipp Reisner 		default:
6671ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
668b411b363SPhilipp Reisner 		}
669b411b363SPhilipp Reisner 		if (disconnect_on_error)
670bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
671b411b363SPhilipp Reisner 	}
67244ed167dSPhilipp Reisner 
673b411b363SPhilipp Reisner 	return sock;
674b411b363SPhilipp Reisner }
675b411b363SPhilipp Reisner 
6767a426fd8SPhilipp Reisner struct accept_wait_data {
677bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6787a426fd8SPhilipp Reisner 	struct socket *s_listen;
6797a426fd8SPhilipp Reisner 	struct completion door_bell;
6807a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6817a426fd8SPhilipp Reisner 
6827a426fd8SPhilipp Reisner };
6837a426fd8SPhilipp Reisner 
drbd_incoming_connection(struct sock * sk)684715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
685b411b363SPhilipp Reisner {
6867a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
687715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6887a426fd8SPhilipp Reisner 
689715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
690715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6917a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
692715306f6SAndreas Gruenbacher 	state_change(sk);
6937a426fd8SPhilipp Reisner }
6947a426fd8SPhilipp Reisner 
prepare_listen_socket(struct drbd_connection * connection,struct accept_wait_data * ad)695bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
696b411b363SPhilipp Reisner {
6971f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69844ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
6991f3e509bSPhilipp Reisner 	struct socket *s_listen;
70044ed167dSPhilipp Reisner 	struct net_conf *nc;
701b411b363SPhilipp Reisner 	const char *what;
702b411b363SPhilipp Reisner 
70344ed167dSPhilipp Reisner 	rcu_read_lock();
704bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70544ed167dSPhilipp Reisner 	if (!nc) {
70644ed167dSPhilipp Reisner 		rcu_read_unlock();
7077a426fd8SPhilipp Reisner 		return -EIO;
70844ed167dSPhilipp Reisner 	}
70944ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
71044ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
71144ed167dSPhilipp Reisner 	rcu_read_unlock();
712b411b363SPhilipp Reisner 
713bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
714bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
715b411b363SPhilipp Reisner 
716b411b363SPhilipp Reisner 	what = "sock_create_kern";
717eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
718b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
719b411b363SPhilipp Reisner 	if (err) {
720b411b363SPhilipp Reisner 		s_listen = NULL;
721b411b363SPhilipp Reisner 		goto out;
722b411b363SPhilipp Reisner 	}
723b411b363SPhilipp Reisner 
7244a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72544ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
726b411b363SPhilipp Reisner 
727b411b363SPhilipp Reisner 	what = "bind before listen";
72844ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
729b411b363SPhilipp Reisner 	if (err < 0)
730b411b363SPhilipp Reisner 		goto out;
731b411b363SPhilipp Reisner 
7327a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7337a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7347a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
735715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7367a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7377a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
738b411b363SPhilipp Reisner 
7392820fd39SPhilipp Reisner 	what = "listen";
7402820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7412820fd39SPhilipp Reisner 	if (err < 0)
7422820fd39SPhilipp Reisner 		goto out;
7432820fd39SPhilipp Reisner 
7447a426fd8SPhilipp Reisner 	return 0;
745b411b363SPhilipp Reisner out:
746b411b363SPhilipp Reisner 	if (s_listen)
747b411b363SPhilipp Reisner 		sock_release(s_listen);
748b411b363SPhilipp Reisner 	if (err < 0) {
749b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7501ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
751bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
752b411b363SPhilipp Reisner 		}
753b411b363SPhilipp Reisner 	}
7541f3e509bSPhilipp Reisner 
7557a426fd8SPhilipp Reisner 	return -EIO;
7561f3e509bSPhilipp Reisner }
7571f3e509bSPhilipp Reisner 
unregister_state_change(struct sock * sk,struct accept_wait_data * ad)758715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
759715306f6SAndreas Gruenbacher {
760715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
761715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
762715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
763715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
764715306f6SAndreas Gruenbacher }
765715306f6SAndreas Gruenbacher 
drbd_wait_for_connect(struct drbd_connection * connection,struct accept_wait_data * ad)766bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7671f3e509bSPhilipp Reisner {
7681f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7691f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7701f3e509bSPhilipp Reisner 	struct net_conf *nc;
7711f3e509bSPhilipp Reisner 
7721f3e509bSPhilipp Reisner 	rcu_read_lock();
773bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7741f3e509bSPhilipp Reisner 	if (!nc) {
7751f3e509bSPhilipp Reisner 		rcu_read_unlock();
7761f3e509bSPhilipp Reisner 		return NULL;
7771f3e509bSPhilipp Reisner 	}
7781f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7791f3e509bSPhilipp Reisner 	rcu_read_unlock();
7801f3e509bSPhilipp Reisner 
7811f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78238b682b2SAkinobu Mita 	/* 28.5% random jitter */
7838032bf12SJason A. Donenfeld 	timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
7841f3e509bSPhilipp Reisner 
7857a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7867a426fd8SPhilipp Reisner 	if (err <= 0)
7877a426fd8SPhilipp Reisner 		return NULL;
7881f3e509bSPhilipp Reisner 
7897a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
790b411b363SPhilipp Reisner 	if (err < 0) {
791b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7921ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
793bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
794b411b363SPhilipp Reisner 		}
795b411b363SPhilipp Reisner 	}
796b411b363SPhilipp Reisner 
797715306f6SAndreas Gruenbacher 	if (s_estab)
798715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
799b411b363SPhilipp Reisner 
800b411b363SPhilipp Reisner 	return s_estab;
801b411b363SPhilipp Reisner }
802b411b363SPhilipp Reisner 
803bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
804b411b363SPhilipp Reisner 
send_first_packet(struct drbd_connection * connection,struct drbd_socket * sock,enum drbd_packet cmd)805bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8069f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8079f5bdc33SAndreas Gruenbacher {
808bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8099f5bdc33SAndreas Gruenbacher 		return -EIO;
810bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
811b411b363SPhilipp Reisner }
812b411b363SPhilipp Reisner 
receive_first_packet(struct drbd_connection * connection,struct socket * sock)813bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
814b411b363SPhilipp Reisner {
815bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8169f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8174920e37aSPhilipp Reisner 	struct net_conf *nc;
8189f5bdc33SAndreas Gruenbacher 	int err;
819b411b363SPhilipp Reisner 
8204920e37aSPhilipp Reisner 	rcu_read_lock();
8214920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8224920e37aSPhilipp Reisner 	if (!nc) {
8234920e37aSPhilipp Reisner 		rcu_read_unlock();
8244920e37aSPhilipp Reisner 		return -EIO;
8254920e37aSPhilipp Reisner 	}
8264920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8274920e37aSPhilipp Reisner 	rcu_read_unlock();
8284920e37aSPhilipp Reisner 
829bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8309f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8319f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8329f5bdc33SAndreas Gruenbacher 			err = -EIO;
8339f5bdc33SAndreas Gruenbacher 		return err;
8349f5bdc33SAndreas Gruenbacher 	}
835bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8369f5bdc33SAndreas Gruenbacher 	if (err)
8379f5bdc33SAndreas Gruenbacher 		return err;
8389f5bdc33SAndreas Gruenbacher 	return pi.cmd;
839b411b363SPhilipp Reisner }
840b411b363SPhilipp Reisner 
841b411b363SPhilipp Reisner /**
842b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
843b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
844b411b363SPhilipp Reisner  */
drbd_socket_okay(struct socket ** sock)8455d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
846b411b363SPhilipp Reisner {
847b411b363SPhilipp Reisner 	int rr;
848b411b363SPhilipp Reisner 	char tb[4];
849b411b363SPhilipp Reisner 
850b411b363SPhilipp Reisner 	if (!*sock)
85181e84650SAndreas Gruenbacher 		return false;
852b411b363SPhilipp Reisner 
853dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
854b411b363SPhilipp Reisner 
855b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85681e84650SAndreas Gruenbacher 		return true;
857b411b363SPhilipp Reisner 	} else {
858b411b363SPhilipp Reisner 		sock_release(*sock);
859b411b363SPhilipp Reisner 		*sock = NULL;
86081e84650SAndreas Gruenbacher 		return false;
861b411b363SPhilipp Reisner 	}
862b411b363SPhilipp Reisner }
8635d0b17f1SPhilipp Reisner 
connection_established(struct drbd_connection * connection,struct socket ** sock1,struct socket ** sock2)8645d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8655d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8665d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8675d0b17f1SPhilipp Reisner {
8685d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8695d0b17f1SPhilipp Reisner 	int timeout;
8705d0b17f1SPhilipp Reisner 	bool ok;
8715d0b17f1SPhilipp Reisner 
8725d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8735d0b17f1SPhilipp Reisner 		return false;
8745d0b17f1SPhilipp Reisner 
8755d0b17f1SPhilipp Reisner 	rcu_read_lock();
8765d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8775d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8785d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8795d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8805d0b17f1SPhilipp Reisner 
8815d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8825d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8835d0b17f1SPhilipp Reisner 
8845d0b17f1SPhilipp Reisner 	return ok;
8855d0b17f1SPhilipp Reisner }
8865d0b17f1SPhilipp Reisner 
8872325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8882325eb66SPhilipp Reisner    in a connection */
drbd_connected(struct drbd_peer_device * peer_device)88969a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
890907599e0SPhilipp Reisner {
89169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8920829f5edSAndreas Gruenbacher 	int err;
893907599e0SPhilipp Reisner 
894b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
895b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
896907599e0SPhilipp Reisner 
89769a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89869a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
899b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
9008410da8fSPhilipp Reisner 
90169a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9020829f5edSAndreas Gruenbacher 	if (!err)
90369a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9040829f5edSAndreas Gruenbacher 	if (!err)
90569a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9060829f5edSAndreas Gruenbacher 	if (!err)
90769a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
908b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
909b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
910b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
911b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9120829f5edSAndreas Gruenbacher 	return err;
913907599e0SPhilipp Reisner }
914b411b363SPhilipp Reisner 
915b411b363SPhilipp Reisner /*
916b411b363SPhilipp Reisner  * return values:
917b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
918b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
919b411b363SPhilipp Reisner  *  -1 peer talks different language,
920b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
921b411b363SPhilipp Reisner  *  -2 We do not have a network config...
922b411b363SPhilipp Reisner  */
conn_connect(struct drbd_connection * connection)923bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
924b411b363SPhilipp Reisner {
9257da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
926c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92744ed167dSPhilipp Reisner 	struct net_conf *nc;
9285d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9295d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
930197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9317a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
932bde89a9eSAndreas Gruenbacher 		.connection = connection,
9337a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9347a426fd8SPhilipp Reisner 	};
935b411b363SPhilipp Reisner 
936bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
937bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
938b411b363SPhilipp Reisner 		return -2;
939b411b363SPhilipp Reisner 
9407da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
941bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
942bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9437da35862SPhilipp Reisner 	sock.socket = NULL;
9447da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
945bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
946bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9477da35862SPhilipp Reisner 	msock.socket = NULL;
9487da35862SPhilipp Reisner 
9490916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
950bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
951b411b363SPhilipp Reisner 
952bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9537a426fd8SPhilipp Reisner 		return 0;
954b411b363SPhilipp Reisner 
955b411b363SPhilipp Reisner 	do {
9562bf89621SAndreas Gruenbacher 		struct socket *s;
957b411b363SPhilipp Reisner 
958bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
959b411b363SPhilipp Reisner 		if (s) {
9607da35862SPhilipp Reisner 			if (!sock.socket) {
9617da35862SPhilipp Reisner 				sock.socket = s;
962bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9637da35862SPhilipp Reisner 			} else if (!msock.socket) {
964bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9657da35862SPhilipp Reisner 				msock.socket = s;
966bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
967b411b363SPhilipp Reisner 			} else {
9681ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
969b411b363SPhilipp Reisner 				goto out_release_sockets;
970b411b363SPhilipp Reisner 			}
971b411b363SPhilipp Reisner 		}
972b411b363SPhilipp Reisner 
9735d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
974b411b363SPhilipp Reisner 			break;
975b411b363SPhilipp Reisner 
976b411b363SPhilipp Reisner retry:
977bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
978b411b363SPhilipp Reisner 		if (s) {
979bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9807da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9817da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98292f14951SPhilipp Reisner 			switch (fp) {
983e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9847da35862SPhilipp Reisner 				if (sock.socket) {
9851ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9867da35862SPhilipp Reisner 					sock_release(sock.socket);
98780c6eed4SPhilipp Reisner 					sock.socket = s;
98880c6eed4SPhilipp Reisner 					goto randomize;
989b411b363SPhilipp Reisner 				}
9907da35862SPhilipp Reisner 				sock.socket = s;
991b411b363SPhilipp Reisner 				break;
992e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
993bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9947da35862SPhilipp Reisner 				if (msock.socket) {
9951ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9967da35862SPhilipp Reisner 					sock_release(msock.socket);
99780c6eed4SPhilipp Reisner 					msock.socket = s;
99880c6eed4SPhilipp Reisner 					goto randomize;
999b411b363SPhilipp Reisner 				}
10007da35862SPhilipp Reisner 				msock.socket = s;
1001b411b363SPhilipp Reisner 				break;
1002b411b363SPhilipp Reisner 			default:
10031ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1004b411b363SPhilipp Reisner 				sock_release(s);
100580c6eed4SPhilipp Reisner randomize:
10068032bf12SJason A. Donenfeld 				if (get_random_u32_below(2))
1007b411b363SPhilipp Reisner 					goto retry;
1008b411b363SPhilipp Reisner 			}
1009b411b363SPhilipp Reisner 		}
1010b411b363SPhilipp Reisner 
1011bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1012b411b363SPhilipp Reisner 			goto out_release_sockets;
1013b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1014b411b363SPhilipp Reisner 			flush_signals(current);
1015b411b363SPhilipp Reisner 			smp_rmb();
1016bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1017b411b363SPhilipp Reisner 				goto out_release_sockets;
1018b411b363SPhilipp Reisner 		}
1019b411b363SPhilipp Reisner 
10205d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1021b666dbf8SPhilipp Reisner 	} while (!ok);
1022b411b363SPhilipp Reisner 
10237a426fd8SPhilipp Reisner 	if (ad.s_listen)
10247a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1025b411b363SPhilipp Reisner 
102698683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102798683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1028b411b363SPhilipp Reisner 
10297da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10307da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1031b411b363SPhilipp Reisner 
103298123866SBenjamin Coddington 	sock.socket->sk->sk_use_task_frag = false;
103398123866SBenjamin Coddington 	msock.socket->sk->sk_use_task_frag = false;
103498123866SBenjamin Coddington 
10357da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10367da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1037b411b363SPhilipp Reisner 
1038b411b363SPhilipp Reisner 	/* NOT YET ...
1039bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10407da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10416038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1042b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
104344ed167dSPhilipp Reisner 	rcu_read_lock();
1044bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1045b411b363SPhilipp Reisner 
10467da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10477da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104844ed167dSPhilipp Reisner 
10497da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
105044ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
105108b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
105244ed167dSPhilipp Reisner 	rcu_read_unlock();
105344ed167dSPhilipp Reisner 
10547da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1055b411b363SPhilipp Reisner 
1056b411b363SPhilipp Reisner 	/* we don't want delays.
105725985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
105812abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(sock.socket->sk);
105912abc5eeSChristoph Hellwig 	tcp_sock_set_nodelay(msock.socket->sk);
1060b411b363SPhilipp Reisner 
1061bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1062bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1063bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1064b411b363SPhilipp Reisner 
1065bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1066b411b363SPhilipp Reisner 	if (h <= 0)
1067b411b363SPhilipp Reisner 		return h;
1068b411b363SPhilipp Reisner 
1069bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1070b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1071bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1072b10d96cbSJohannes Thoma 		case -1:
10731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1074b411b363SPhilipp Reisner 			return -1;
1075b10d96cbSJohannes Thoma 		case 0:
10761ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1077b10d96cbSJohannes Thoma 			return 0;
1078b411b363SPhilipp Reisner 		}
1079b411b363SPhilipp Reisner 	}
1080b411b363SPhilipp Reisner 
1081bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1082bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1083b411b363SPhilipp Reisner 
1084bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10857e2455c1SPhilipp Reisner 		return -1;
10861e86ac48SPhilipp Reisner 
108713c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108813c76abaSPhilipp Reisner 	 * being promoted to Primary.
108913c76abaSPhilipp Reisner 	 *
109013c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
109113c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
109213c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
109313c76abaSPhilipp Reisner 	 */
109431007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109531007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109631007745SPhilipp Reisner 
1097cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1098cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1100cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
110131007745SPhilipp Reisner 
110231007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110331007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
110431007745SPhilipp Reisner 
110531007745SPhilipp Reisner 	rcu_read_lock();
110631007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110731007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110831007745SPhilipp Reisner 		kref_get(&device->kref);
110931007745SPhilipp Reisner 		rcu_read_unlock();
111013c76abaSPhilipp Reisner 
111108b165baSPhilipp Reisner 		if (discard_my_data)
1112b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner 		else
1114b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111508b165baSPhilipp Reisner 
111669a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111705a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1118c141ebdaSPhilipp Reisner 		rcu_read_lock();
1119c141ebdaSPhilipp Reisner 	}
1120c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1121c141ebdaSPhilipp Reisner 
1122bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1123bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1124bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11251e86ac48SPhilipp Reisner 		return 0;
1126a1096a6eSPhilipp Reisner 	}
11271e86ac48SPhilipp Reisner 
11281c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112939e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
113039e91a60SLars Ellenberg 	 * to be able to use format string arguments */
113139e91a60SLars Ellenberg 	connection->ack_sender =
113239e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1133668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1134668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1135668700b4SPhilipp Reisner 		return 0;
1136668700b4SPhilipp Reisner 	}
1137b411b363SPhilipp Reisner 
11380500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113908b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
114008b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
114108b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
114208b165baSPhilipp Reisner 	 * just to clear a single value. */
1143bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11440500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114508b165baSPhilipp Reisner 
1146d3fcb490SPhilipp Reisner 	return h;
1147b411b363SPhilipp Reisner 
1148b411b363SPhilipp Reisner out_release_sockets:
11497a426fd8SPhilipp Reisner 	if (ad.s_listen)
11507a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11517da35862SPhilipp Reisner 	if (sock.socket)
11527da35862SPhilipp Reisner 		sock_release(sock.socket);
11537da35862SPhilipp Reisner 	if (msock.socket)
11547da35862SPhilipp Reisner 		sock_release(msock.socket);
1155b411b363SPhilipp Reisner 	return -1;
1156b411b363SPhilipp Reisner }
1157b411b363SPhilipp Reisner 
decode_header(struct drbd_connection * connection,void * header,struct packet_info * pi)1158bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1159b411b363SPhilipp Reisner {
1160bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1161b411b363SPhilipp Reisner 
11620c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11630c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11640c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11650c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11661ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11670c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116802918be2SPhilipp Reisner 		}
11690c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11700c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11710c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11720c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1173e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1174e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1175e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1176b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1177eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1178e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1179e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1180e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1181e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1182e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
118377351055SPhilipp Reisner 		pi->vnr = 0;
118402918be2SPhilipp Reisner 	} else {
11851ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1186e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1187bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11888172f3e9SAndreas Gruenbacher 		return -EINVAL;
1189b411b363SPhilipp Reisner 	}
1190e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11918172f3e9SAndreas Gruenbacher 	return 0;
1192b411b363SPhilipp Reisner }
1193b411b363SPhilipp Reisner 
drbd_unplug_all_devices(struct drbd_connection * connection)1194c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1195c51a0ef3SLars Ellenberg {
1196c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1197c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1198c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1199c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1200c51a0ef3SLars Ellenberg }
1201c51a0ef3SLars Ellenberg 
drbd_recv_header(struct drbd_connection * connection,struct packet_info * pi)1202bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1203257d0af6SPhilipp Reisner {
1204bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120569bc7bc3SAndreas Gruenbacher 	int err;
1206257d0af6SPhilipp Reisner 
1207bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1208a5c31904SAndreas Gruenbacher 	if (err)
120969bc7bc3SAndreas Gruenbacher 		return err;
1210257d0af6SPhilipp Reisner 
1211bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1212bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1213b411b363SPhilipp Reisner 
121469bc7bc3SAndreas Gruenbacher 	return err;
1215b411b363SPhilipp Reisner }
1216b411b363SPhilipp Reisner 
drbd_recv_header_maybe_unplug(struct drbd_connection * connection,struct packet_info * pi)1217c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1218c51a0ef3SLars Ellenberg {
1219c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1220c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1221c51a0ef3SLars Ellenberg 	int err;
1222c51a0ef3SLars Ellenberg 
1223c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1224c51a0ef3SLars Ellenberg 	if (err != size) {
1225c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1226c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1227c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1228c51a0ef3SLars Ellenberg 		 * received so far. */
1229c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1230ddd061b8SChristoph Hellwig 			tcp_sock_set_quickack(connection->data.socket->sk, 2);
1231c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1232c51a0ef3SLars Ellenberg 		}
1233c51a0ef3SLars Ellenberg 		if (err > 0) {
1234c51a0ef3SLars Ellenberg 			buffer += err;
1235c51a0ef3SLars Ellenberg 			size -= err;
1236c51a0ef3SLars Ellenberg 		}
1237c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1238c51a0ef3SLars Ellenberg 		if (err)
1239c51a0ef3SLars Ellenberg 			return err;
1240c51a0ef3SLars Ellenberg 	}
1241c51a0ef3SLars Ellenberg 
1242c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1243c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1244c51a0ef3SLars Ellenberg 
1245c51a0ef3SLars Ellenberg 	return err;
1246c51a0ef3SLars Ellenberg }
1247f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1248f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1249f9ff0da5SLars Ellenberg  * then wait for all completions.
1250f9ff0da5SLars Ellenberg  */
1251f9ff0da5SLars Ellenberg struct issue_flush_context {
1252f9ff0da5SLars Ellenberg 	atomic_t pending;
1253f9ff0da5SLars Ellenberg 	int error;
1254f9ff0da5SLars Ellenberg 	struct completion done;
1255f9ff0da5SLars Ellenberg };
1256f9ff0da5SLars Ellenberg struct one_flush_context {
1257f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1258f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1259f9ff0da5SLars Ellenberg };
1260f9ff0da5SLars Ellenberg 
one_flush_endio(struct bio * bio)12611ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1262f9ff0da5SLars Ellenberg {
1263f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1264f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1265f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1266f9ff0da5SLars Ellenberg 
12674e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12684e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12694e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1270f9ff0da5SLars Ellenberg 	}
1271f9ff0da5SLars Ellenberg 	kfree(octx);
1272f9ff0da5SLars Ellenberg 	bio_put(bio);
1273f9ff0da5SLars Ellenberg 
1274f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1275f9ff0da5SLars Ellenberg 	put_ldev(device);
1276f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1277f9ff0da5SLars Ellenberg 
1278f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1279f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1280f9ff0da5SLars Ellenberg }
1281f9ff0da5SLars Ellenberg 
submit_one_flush(struct drbd_device * device,struct issue_flush_context * ctx)1282f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1283f9ff0da5SLars Ellenberg {
128407888c66SChristoph Hellwig 	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
1285*3899d94eSChristoph Böhmwalder 				    REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO);
1286f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
12874b1dc86dSChristoph Hellwig 
12884b1dc86dSChristoph Hellwig 	if (!octx) {
12894b1dc86dSChristoph Hellwig 		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
1290f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1291f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1292f9ff0da5SLars Ellenberg 		 */
1293f9ff0da5SLars Ellenberg 		bio_put(bio);
1294f9ff0da5SLars Ellenberg 
1295f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1296f9ff0da5SLars Ellenberg 		put_ldev(device);
1297f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1298f9ff0da5SLars Ellenberg 		return;
1299f9ff0da5SLars Ellenberg 	}
1300f9ff0da5SLars Ellenberg 
1301f9ff0da5SLars Ellenberg 	octx->device = device;
1302f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
1303f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1304f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
1305f9ff0da5SLars Ellenberg 
1306f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1307f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1308f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1309f9ff0da5SLars Ellenberg 	submit_bio(bio);
1310f9ff0da5SLars Ellenberg }
1311f9ff0da5SLars Ellenberg 
drbd_flush(struct drbd_connection * connection)1312bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1313b411b363SPhilipp Reisner {
1314f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1315c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1316f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13174b0007c0SPhilipp Reisner 		int vnr;
1318b411b363SPhilipp Reisner 
1319f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1320f9ff0da5SLars Ellenberg 		ctx.error = 0;
1321f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1322f9ff0da5SLars Ellenberg 
1323615e087fSLars Ellenberg 		rcu_read_lock();
1324c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1325c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1326c06ece6bSAndreas Gruenbacher 
1327b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1328615e087fSLars Ellenberg 				continue;
1329b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1330615e087fSLars Ellenberg 			rcu_read_unlock();
13314b0007c0SPhilipp Reisner 
1332f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1333f9ff0da5SLars Ellenberg 
1334f9ff0da5SLars Ellenberg 			rcu_read_lock();
1335f9ff0da5SLars Ellenberg 		}
1336f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1337f9ff0da5SLars Ellenberg 
1338f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1339f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1340f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1341f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1342f9ff0da5SLars Ellenberg 
1343f9ff0da5SLars Ellenberg 		if (ctx.error) {
1344b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1345b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1346b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1347f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1348f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1349b411b363SPhilipp Reisner 		}
1350b411b363SPhilipp Reisner 	}
1351b411b363SPhilipp Reisner }
1352b411b363SPhilipp Reisner 
1353b411b363SPhilipp Reisner /**
1354b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13559b48ff07SLee Jones  * @connection:	DRBD connection.
1356b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1357b411b363SPhilipp Reisner  * @ev:		Epoch event.
1358b411b363SPhilipp Reisner  */
drbd_may_finish_epoch(struct drbd_connection * connection,struct drbd_epoch * epoch,enum epoch_event ev)1359bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1360b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1361b411b363SPhilipp Reisner 					       enum epoch_event ev)
1362b411b363SPhilipp Reisner {
13632451fc3bSPhilipp Reisner 	int epoch_size;
1364b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1365b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1366b411b363SPhilipp Reisner 
1367bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1368b411b363SPhilipp Reisner 	do {
1369b411b363SPhilipp Reisner 		next_epoch = NULL;
1370b411b363SPhilipp Reisner 
1371b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1372b411b363SPhilipp Reisner 
1373b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1374b411b363SPhilipp Reisner 		case EV_PUT:
1375b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1376b411b363SPhilipp Reisner 			break;
1377b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1378b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1379b411b363SPhilipp Reisner 			break;
1380b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1381b411b363SPhilipp Reisner 			/* nothing to do*/
1382b411b363SPhilipp Reisner 			break;
1383b411b363SPhilipp Reisner 		}
1384b411b363SPhilipp Reisner 
1385b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1386b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138780f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1388b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1389bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1390bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1391bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1392b411b363SPhilipp Reisner 			}
13939ed57dcbSLars Ellenberg #if 0
13949ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13959ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139680f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1397bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13989ed57dcbSLars Ellenberg #endif
1399b411b363SPhilipp Reisner 
1400bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1401b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1402b411b363SPhilipp Reisner 				list_del(&epoch->list);
1403b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1404bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1405b411b363SPhilipp Reisner 				kfree(epoch);
1406b411b363SPhilipp Reisner 
1407b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1408b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1409b411b363SPhilipp Reisner 			} else {
1410b411b363SPhilipp Reisner 				epoch->flags = 0;
1411b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1412698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1413b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1414b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1415b411b363SPhilipp Reisner 			}
1416b411b363SPhilipp Reisner 		}
1417b411b363SPhilipp Reisner 
1418b411b363SPhilipp Reisner 		if (!next_epoch)
1419b411b363SPhilipp Reisner 			break;
1420b411b363SPhilipp Reisner 
1421b411b363SPhilipp Reisner 		epoch = next_epoch;
1422b411b363SPhilipp Reisner 	} while (1);
1423b411b363SPhilipp Reisner 
1424bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1425b411b363SPhilipp Reisner 
1426b411b363SPhilipp Reisner 	return rv;
1427b411b363SPhilipp Reisner }
1428b411b363SPhilipp Reisner 
14298fe39aacSPhilipp Reisner static enum write_ordering_e
max_allowed_wo(struct drbd_backing_dev * bdev,enum write_ordering_e wo)14308fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14318fe39aacSPhilipp Reisner {
14328fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14338fe39aacSPhilipp Reisner 
14348fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14358fe39aacSPhilipp Reisner 
1436f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1437f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1438f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1439f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14408fe39aacSPhilipp Reisner 
14418fe39aacSPhilipp Reisner 	return wo;
14428fe39aacSPhilipp Reisner }
14438fe39aacSPhilipp Reisner 
14449b48ff07SLee Jones /*
1445b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1446b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1447b411b363SPhilipp Reisner  */
drbd_bump_write_ordering(struct drbd_resource * resource,struct drbd_backing_dev * bdev,enum write_ordering_e wo)14488fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14498fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1450b411b363SPhilipp Reisner {
1451e9526580SPhilipp Reisner 	struct drbd_device *device;
1452b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14534b0007c0SPhilipp Reisner 	int vnr;
1454b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1455f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1456f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1457f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1458b411b363SPhilipp Reisner 	};
1459b411b363SPhilipp Reisner 
1460e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1461f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1462b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1463daeda1ccSPhilipp Reisner 	rcu_read_lock();
1464e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14658fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14668fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14678fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14688fe39aacSPhilipp Reisner 				bdev = NULL;
1469b30ab791SAndreas Gruenbacher 			put_ldev(device);
14704b0007c0SPhilipp Reisner 		}
14718fe39aacSPhilipp Reisner 	}
14728fe39aacSPhilipp Reisner 
14738fe39aacSPhilipp Reisner 	if (bdev)
14748fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14758fe39aacSPhilipp Reisner 
147670df7092SLars Ellenberg 	rcu_read_unlock();
147770df7092SLars Ellenberg 
1478e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1479f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1480e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1481b411b363SPhilipp Reisner }
1482b411b363SPhilipp Reisner 
1483f31e583aSLars Ellenberg /*
1484f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1485f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1486f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1487f31e583aSLars Ellenberg  * never even try to UNMAP.
1488f31e583aSLars Ellenberg  *
1489f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1490f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1491f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1492f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1495f31e583aSLars Ellenberg  *
1496f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1497f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1498f31e583aSLars Ellenberg  *
1499f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1500f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1501f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1502f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1503f31e583aSLars Ellenberg  * still behaves this way.
1504f31e583aSLars Ellenberg  *
1505f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1506f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1507f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1508f31e583aSLars Ellenberg  *
1509f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1510f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1511f31e583aSLars Ellenberg  */
1512f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
drbd_issue_discard_or_zero_out(struct drbd_device * device,sector_t start,unsigned int nr_sectors,int flags)1513f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1514dd4f699dSLars Ellenberg {
15150dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1516f31e583aSLars Ellenberg 	sector_t tmp, nr;
1517f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1518f31e583aSLars Ellenberg 	int alignment;
1519f31e583aSLars Ellenberg 	int err = 0;
1520dd4f699dSLars Ellenberg 
1521f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1522f31e583aSLars Ellenberg 		goto zero_out;
1523f31e583aSLars Ellenberg 
1524f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
15257b47ef52SChristoph Hellwig 	granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
1526f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1527f31e583aSLars Ellenberg 
1528cf0fbf89SChristoph Hellwig 	max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
1529f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1530f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1531f31e583aSLars Ellenberg 		goto zero_out;
1532f31e583aSLars Ellenberg 
1533f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1534f31e583aSLars Ellenberg 		goto zero_out;
1535f31e583aSLars Ellenberg 
1536f31e583aSLars Ellenberg 	tmp = start;
1537f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1538f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1539f31e583aSLars Ellenberg 			goto zero_out;
1540f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1541f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1542f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1543f31e583aSLars Ellenberg 
1544f31e583aSLars Ellenberg 		nr = tmp - start;
1545f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1546f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1547f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1548f31e583aSLars Ellenberg 		nr_sectors -= nr;
1549f31e583aSLars Ellenberg 		start = tmp;
1550f31e583aSLars Ellenberg 	}
1551f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
155244abff2cSChristoph Hellwig 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
155344abff2cSChristoph Hellwig 					    GFP_NOIO);
1554f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1555f31e583aSLars Ellenberg 		start += max_discard_sectors;
1556f31e583aSLars Ellenberg 	}
1557f31e583aSLars Ellenberg 	if (nr_sectors) {
1558f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1559f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1560f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1561f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1562f31e583aSLars Ellenberg 		nr = nr_sectors;
1563f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1564f31e583aSLars Ellenberg 		if (nr) {
156544abff2cSChristoph Hellwig 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
1566f31e583aSLars Ellenberg 			nr_sectors -= nr;
1567f31e583aSLars Ellenberg 			start += nr;
1568f31e583aSLars Ellenberg 		}
1569f31e583aSLars Ellenberg 	}
1570f31e583aSLars Ellenberg  zero_out:
1571f31e583aSLars Ellenberg 	if (nr_sectors) {
1572f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1573f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1574f31e583aSLars Ellenberg 	}
1575f31e583aSLars Ellenberg 	return err != 0;
1576f31e583aSLars Ellenberg }
1577f31e583aSLars Ellenberg 
can_do_reliable_discards(struct drbd_device * device)1578f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1579f31e583aSLars Ellenberg {
1580f31e583aSLars Ellenberg 	struct disk_conf *dc;
1581f31e583aSLars Ellenberg 	bool can_do;
1582f31e583aSLars Ellenberg 
158370200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
1584f31e583aSLars Ellenberg 		return false;
1585f31e583aSLars Ellenberg 
1586f31e583aSLars Ellenberg 	rcu_read_lock();
1587f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1588f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1589f31e583aSLars Ellenberg 	rcu_read_unlock();
1590f31e583aSLars Ellenberg 	return can_do;
1591f31e583aSLars Ellenberg }
1592f31e583aSLars Ellenberg 
drbd_issue_peer_discard_or_zero_out(struct drbd_device * device,struct drbd_peer_request * peer_req)1593f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1594f31e583aSLars Ellenberg {
1595f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1596f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1597f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1598f31e583aSLars Ellenberg 	 * otherwise. */
1599f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1600f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1601f31e583aSLars Ellenberg 
1602f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1603f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1604dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1605dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1606dd4f699dSLars Ellenberg }
1607dd4f699dSLars Ellenberg 
peer_request_fault_type(struct drbd_peer_request * peer_req)1608ce668b6dSChristoph Böhmwalder static int peer_request_fault_type(struct drbd_peer_request *peer_req)
1609ce668b6dSChristoph Böhmwalder {
1610ce668b6dSChristoph Böhmwalder 	if (peer_req_op(peer_req) == REQ_OP_READ) {
1611ce668b6dSChristoph Böhmwalder 		return peer_req->flags & EE_APPLICATION ?
1612ce668b6dSChristoph Böhmwalder 			DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
1613ce668b6dSChristoph Böhmwalder 	} else {
1614ce668b6dSChristoph Böhmwalder 		return peer_req->flags & EE_APPLICATION ?
1615ce668b6dSChristoph Böhmwalder 			DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
1616ce668b6dSChristoph Böhmwalder 	}
1617ce668b6dSChristoph Böhmwalder }
1618ce668b6dSChristoph Böhmwalder 
1619a34592ffSChristoph Hellwig /**
1620fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1621db830c46SAndreas Gruenbacher  * @peer_req:	peer request
162210f6d992SLars Ellenberg  *
162310f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162410f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162510f6d992SLars Ellenberg  *
162610f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
162710f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
162810f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
162910f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
163010f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
163110f6d992SLars Ellenberg  *  on certain Xen deployments.
163245bb912bSLars Ellenberg  */
163345bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
drbd_submit_peer_request(struct drbd_peer_request * peer_req)1634ce668b6dSChristoph Böhmwalder int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
163545bb912bSLars Ellenberg {
1636ce668b6dSChristoph Böhmwalder 	struct drbd_device *device = peer_req->peer_device->device;
163745bb912bSLars Ellenberg 	struct bio *bios = NULL;
163845bb912bSLars Ellenberg 	struct bio *bio;
1639db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1640db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1641e6be38a1SCai Huoqing 	unsigned int data_size = peer_req->i.size;
1642e6be38a1SCai Huoqing 	unsigned int n_bios = 0;
1643e6be38a1SCai Huoqing 	unsigned int nr_pages = PFN_UP(data_size);
164445bb912bSLars Ellenberg 
1645dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1646dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1647dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1648dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1649dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1650dd4f699dSLars Ellenberg 	 */
1651a34592ffSChristoph Hellwig 	if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
1652a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1653a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16545dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
165545d2933cSLars Ellenberg 		/* add it to the active list now,
165645d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
165721ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
165821ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1659700ca8c0SPhilipp Reisner 
1660700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1661700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1662700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166345d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
166445d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
166545d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1666700ca8c0SPhilipp Reisner 		}
1667700ca8c0SPhilipp Reisner 
1668f31e583aSLars Ellenberg 		drbd_issue_peer_discard_or_zero_out(device, peer_req);
1669a0fb3c47SLars Ellenberg 		return 0;
1670a0fb3c47SLars Ellenberg 	}
1671a0fb3c47SLars Ellenberg 
167245bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
167345bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
167445bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16759476f39dSLars Ellenberg 	 * request in more than one bio.
16769476f39dSLars Ellenberg 	 *
16779476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16789476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16799476f39dSLars Ellenberg 	 */
168045bb912bSLars Ellenberg next_bio:
1681ce668b6dSChristoph Böhmwalder 	/* _DISCARD, _WRITE_ZEROES handled above.
1682ce668b6dSChristoph Böhmwalder 	 * REQ_OP_FLUSH (empty flush) not expected,
1683ce668b6dSChristoph Böhmwalder 	 * should have been mapped to a "drbd protocol barrier".
1684ce668b6dSChristoph Böhmwalder 	 * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
1685ce668b6dSChristoph Böhmwalder 	 */
1686ce668b6dSChristoph Böhmwalder 	if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
1687ce668b6dSChristoph Böhmwalder 				peer_req_op(peer_req) == REQ_OP_READ)) {
1688ce668b6dSChristoph Böhmwalder 		drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
1689ce668b6dSChristoph Böhmwalder 		return -EINVAL;
1690ce668b6dSChristoph Böhmwalder 	}
1691ce668b6dSChristoph Böhmwalder 
1692ce668b6dSChristoph Böhmwalder 	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
1693db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16944f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1695db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1696fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
169745bb912bSLars Ellenberg 
169845bb912bSLars Ellenberg 	bio->bi_next = bios;
169945bb912bSLars Ellenberg 	bios = bio;
170045bb912bSLars Ellenberg 	++n_bios;
170145bb912bSLars Ellenberg 
170245bb912bSLars Ellenberg 	page_chain_for_each(page) {
170311f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170406efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170545bb912bSLars Ellenberg 			goto next_bio;
170611f8b2b6SAndreas Gruenbacher 		data_size -= len;
170745bb912bSLars Ellenberg 		sector += len >> 9;
170845bb912bSLars Ellenberg 		--nr_pages;
170945bb912bSLars Ellenberg 	}
171011f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1711a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
171245bb912bSLars Ellenberg 
1713db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171421ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171521ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
171621ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
171745bb912bSLars Ellenberg 	do {
171845bb912bSLars Ellenberg 		bio = bios;
171945bb912bSLars Ellenberg 		bios = bios->bi_next;
172045bb912bSLars Ellenberg 		bio->bi_next = NULL;
172145bb912bSLars Ellenberg 
1722ce668b6dSChristoph Böhmwalder 		drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
172345bb912bSLars Ellenberg 	} while (bios);
172445bb912bSLars Ellenberg 	return 0;
172545bb912bSLars Ellenberg }
172645bb912bSLars Ellenberg 
drbd_remove_epoch_entry_interval(struct drbd_device * device,struct drbd_peer_request * peer_req)1727b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1728db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
172953840641SAndreas Gruenbacher {
1730db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
173153840641SAndreas Gruenbacher 
1732b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
173353840641SAndreas Gruenbacher 	drbd_clear_interval(i);
173453840641SAndreas Gruenbacher 
17356c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
173653840641SAndreas Gruenbacher 	if (i->waiting)
1737b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
173853840641SAndreas Gruenbacher }
173953840641SAndreas Gruenbacher 
conn_wait_active_ee_empty(struct drbd_connection * connection)1740bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
174177fede51SPhilipp Reisner {
1742c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
174377fede51SPhilipp Reisner 	int vnr;
174477fede51SPhilipp Reisner 
174577fede51SPhilipp Reisner 	rcu_read_lock();
1746c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1747c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1748c06ece6bSAndreas Gruenbacher 
1749b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
175077fede51SPhilipp Reisner 		rcu_read_unlock();
1751b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
175205a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
175377fede51SPhilipp Reisner 		rcu_read_lock();
175477fede51SPhilipp Reisner 	}
175577fede51SPhilipp Reisner 	rcu_read_unlock();
175677fede51SPhilipp Reisner }
175777fede51SPhilipp Reisner 
receive_Barrier(struct drbd_connection * connection,struct packet_info * pi)1758bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1759b411b363SPhilipp Reisner {
17602451fc3bSPhilipp Reisner 	int rv;
1761e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1762b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1763b411b363SPhilipp Reisner 
17649ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17659ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17669ed57dcbSLars Ellenberg 	 */
1767bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1768bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1769bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1770b411b363SPhilipp Reisner 
1771b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1772b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1773b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1774b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1775b411b363SPhilipp Reisner 	 * completed. */
1776e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1777f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1778b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
177982bc0194SAndreas Gruenbacher 			return 0;
1780b411b363SPhilipp Reisner 
1781b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1782b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1783b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17842451fc3bSPhilipp Reisner 		if (epoch)
17852451fc3bSPhilipp Reisner 			break;
17862451fc3bSPhilipp Reisner 		else
17871ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1788df561f66SGustavo A. R. Silva 		fallthrough;
17892451fc3bSPhilipp Reisner 
1790f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1791f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1792bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1793bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
17942451fc3bSPhilipp Reisner 
1795bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
17962451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17972451fc3bSPhilipp Reisner 			if (epoch)
17982451fc3bSPhilipp Reisner 				break;
1799b411b363SPhilipp Reisner 		}
1800b411b363SPhilipp Reisner 
180182bc0194SAndreas Gruenbacher 		return 0;
18022451fc3bSPhilipp Reisner 	default:
1803e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1804e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
180582bc0194SAndreas Gruenbacher 		return -EIO;
1806b411b363SPhilipp Reisner 	}
1807b411b363SPhilipp Reisner 
1808b411b363SPhilipp Reisner 	epoch->flags = 0;
1809b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1810b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1811b411b363SPhilipp Reisner 
1812bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1813bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1814bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1815bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1816bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1817b411b363SPhilipp Reisner 	} else {
1818b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1819b411b363SPhilipp Reisner 		kfree(epoch);
1820b411b363SPhilipp Reisner 	}
1821bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1822b411b363SPhilipp Reisner 
182382bc0194SAndreas Gruenbacher 	return 0;
1824b411b363SPhilipp Reisner }
1825b411b363SPhilipp Reisner 
18269104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
drbd_csum_ee_size(struct crypto_shash * h,struct drbd_peer_request * r,void * d,unsigned int payload_size)18273d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18289104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18299104d31aSLars Ellenberg 			      unsigned int payload_size)
18309104d31aSLars Ellenberg {
18319104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18329104d31aSLars Ellenberg 	r->i.size = payload_size;
18339104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18349104d31aSLars Ellenberg 	r->i.size = tmp;
18359104d31aSLars Ellenberg }
18369104d31aSLars Ellenberg 
1837b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18389104d31aSLars Ellenberg  * and from receive_Data.
18399104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18409104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18419104d31aSLars Ellenberg  * 	for discards, that is zero.
18429104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18439104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18449104d31aSLars Ellenberg  * as extra argument in the packet header.
18459104d31aSLars Ellenberg  */
1846f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
read_in_block(struct drbd_peer_device * peer_device,u64 id,sector_t sector,struct packet_info * pi)184769a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1848a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1849b411b363SPhilipp Reisner {
185069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1851155bd9d1SChristoph Hellwig 	const sector_t capacity = get_capacity(device->vdisk);
1852db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1853b411b363SPhilipp Reisner 	struct page *page;
185411f8b2b6SAndreas Gruenbacher 	int digest_size, err;
185511f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
185669a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
185769a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18586b4388acSPhilipp Reisner 	unsigned long *data;
1859a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1860f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1861b411b363SPhilipp Reisner 
186211f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1863a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18643d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18659f5bdc33SAndreas Gruenbacher 		/*
18669f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18679f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18689f5bdc33SAndreas Gruenbacher 		 */
186911f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1870a5c31904SAndreas Gruenbacher 		if (err)
1871b411b363SPhilipp Reisner 			return NULL;
187211f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
187388104ca4SAndreas Gruenbacher 	}
1874b411b363SPhilipp Reisner 
1875a34592ffSChristoph Hellwig 	/* assume request_size == data_size, but special case trim. */
18769104d31aSLars Ellenberg 	ds = data_size;
1877a0fb3c47SLars Ellenberg 	if (trim) {
1878677b3672SChristoph Böhmwalder 		if (!expect(peer_device, data_size == 0))
18799104d31aSLars Ellenberg 			return NULL;
18809104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1881f31e583aSLars Ellenberg 	} else if (zeroes) {
1882677b3672SChristoph Böhmwalder 		if (!expect(peer_device, data_size == 0))
1883f31e583aSLars Ellenberg 			return NULL;
1884f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
1885a0fb3c47SLars Ellenberg 	}
1886a0fb3c47SLars Ellenberg 
1887677b3672SChristoph Böhmwalder 	if (!expect(peer_device, IS_ALIGNED(ds, 512)))
1888841ce241SAndreas Gruenbacher 		return NULL;
1889a34592ffSChristoph Hellwig 	if (trim || zeroes) {
1890677b3672SChristoph Böhmwalder 		if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
18919104d31aSLars Ellenberg 			return NULL;
1892677b3672SChristoph Böhmwalder 	} else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE))
1893841ce241SAndreas Gruenbacher 		return NULL;
1894b411b363SPhilipp Reisner 
18956666032aSLars Ellenberg 	/* even though we trust out peer,
18966666032aSLars Ellenberg 	 * we sometimes have to double check. */
18979104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1898d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1899fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
19006666032aSLars Ellenberg 			(unsigned long long)capacity,
19019104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19026666032aSLars Ellenberg 		return NULL;
19036666032aSLars Ellenberg 	}
19046666032aSLars Ellenberg 
1905b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1906b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1907b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19089104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1909db830c46SAndreas Gruenbacher 	if (!peer_req)
1910b411b363SPhilipp Reisner 		return NULL;
191145bb912bSLars Ellenberg 
191221ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19139104d31aSLars Ellenberg 	if (trim) {
1914f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1915f31e583aSLars Ellenberg 		return peer_req;
1916f31e583aSLars Ellenberg 	}
1917f31e583aSLars Ellenberg 	if (zeroes) {
1918f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
191981a3537aSLars Ellenberg 		return peer_req;
19209104d31aSLars Ellenberg 	}
1921a73ff323SLars Ellenberg 
19229104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1923b411b363SPhilipp Reisner 	ds = data_size;
1924db830c46SAndreas Gruenbacher 	page = peer_req->pages;
192545bb912bSLars Ellenberg 	page_chain_for_each(page) {
192645bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19276b4388acSPhilipp Reisner 		data = kmap(page);
192869a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1929b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1930d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19316b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19326b4388acSPhilipp Reisner 		}
1933b411b363SPhilipp Reisner 		kunmap(page);
1934a5c31904SAndreas Gruenbacher 		if (err) {
1935b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1936b411b363SPhilipp Reisner 			return NULL;
1937b411b363SPhilipp Reisner 		}
1938a5c31904SAndreas Gruenbacher 		ds -= len;
1939b411b363SPhilipp Reisner 	}
1940b411b363SPhilipp Reisner 
194111f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19429104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
194311f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1944d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1945470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1946b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1947b411b363SPhilipp Reisner 			return NULL;
1948b411b363SPhilipp Reisner 		}
1949b411b363SPhilipp Reisner 	}
1950b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1951db830c46SAndreas Gruenbacher 	return peer_req;
1952b411b363SPhilipp Reisner }
1953b411b363SPhilipp Reisner 
1954b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1955b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1956b411b363SPhilipp Reisner  */
drbd_drain_block(struct drbd_peer_device * peer_device,int data_size)195769a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1958b411b363SPhilipp Reisner {
1959b411b363SPhilipp Reisner 	struct page *page;
1960a5c31904SAndreas Gruenbacher 	int err = 0;
1961b411b363SPhilipp Reisner 	void *data;
1962b411b363SPhilipp Reisner 
1963c3470cdeSLars Ellenberg 	if (!data_size)
1964fc5be839SAndreas Gruenbacher 		return 0;
1965c3470cdeSLars Ellenberg 
196669a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1967b411b363SPhilipp Reisner 
1968b411b363SPhilipp Reisner 	data = kmap(page);
1969b411b363SPhilipp Reisner 	while (data_size) {
1970fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1971fc5be839SAndreas Gruenbacher 
197269a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1973a5c31904SAndreas Gruenbacher 		if (err)
1974b411b363SPhilipp Reisner 			break;
1975a5c31904SAndreas Gruenbacher 		data_size -= len;
1976b411b363SPhilipp Reisner 	}
1977b411b363SPhilipp Reisner 	kunmap(page);
197869a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
1979fc5be839SAndreas Gruenbacher 	return err;
1980b411b363SPhilipp Reisner }
1981b411b363SPhilipp Reisner 
recv_dless_read(struct drbd_peer_device * peer_device,struct drbd_request * req,sector_t sector,int data_size)198269a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1983b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
1984b411b363SPhilipp Reisner {
19857988613bSKent Overstreet 	struct bio_vec bvec;
19867988613bSKent Overstreet 	struct bvec_iter iter;
1987b411b363SPhilipp Reisner 	struct bio *bio;
198811f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
198969a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
199069a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
1991b411b363SPhilipp Reisner 
199211f8b2b6SAndreas Gruenbacher 	digest_size = 0;
199369a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
19943d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
199511f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1996a5c31904SAndreas Gruenbacher 		if (err)
1997a5c31904SAndreas Gruenbacher 			return err;
199811f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
199988104ca4SAndreas Gruenbacher 	}
2000b411b363SPhilipp Reisner 
2001b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2002b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
200369a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2004b411b363SPhilipp Reisner 
2005b411b363SPhilipp Reisner 	bio = req->master_bio;
200669a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2007b411b363SPhilipp Reisner 
20087988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20093eddaa60SChristoph Hellwig 		void *mapped = bvec_kmap_local(&bvec);
20107988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
201169a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20123eddaa60SChristoph Hellwig 		kunmap_local(mapped);
2013a5c31904SAndreas Gruenbacher 		if (err)
2014a5c31904SAndreas Gruenbacher 			return err;
2015a5c31904SAndreas Gruenbacher 		data_size -= expect;
2016b411b363SPhilipp Reisner 	}
2017b411b363SPhilipp Reisner 
201811f8b2b6SAndreas Gruenbacher 	if (digest_size) {
201969a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
202011f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
202169a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
202228284cefSAndreas Gruenbacher 			return -EINVAL;
2023b411b363SPhilipp Reisner 		}
2024b411b363SPhilipp Reisner 	}
2025b411b363SPhilipp Reisner 
202669a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
202728284cefSAndreas Gruenbacher 	return 0;
2028b411b363SPhilipp Reisner }
2029b411b363SPhilipp Reisner 
2030a990be46SAndreas Gruenbacher /*
2031668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2032a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2033a990be46SAndreas Gruenbacher  */
e_end_resync_block(struct drbd_work * w,int unused)203499920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2035b411b363SPhilipp Reisner {
20368050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2037a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2038a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2039a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2040db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
204199920dc5SAndreas Gruenbacher 	int err;
2042b411b363SPhilipp Reisner 
20430b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2044b411b363SPhilipp Reisner 
2045db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
20460d11f3cfSChristoph Böhmwalder 		drbd_set_in_sync(peer_device, sector, peer_req->i.size);
2047a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2048b411b363SPhilipp Reisner 	} else {
2049b411b363SPhilipp Reisner 		/* Record failure to sync */
20500d11f3cfSChristoph Böhmwalder 		drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
2051b411b363SPhilipp Reisner 
2052a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2053b411b363SPhilipp Reisner 	}
2054b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2055b411b363SPhilipp Reisner 
205699920dc5SAndreas Gruenbacher 	return err;
2057b411b363SPhilipp Reisner }
2058b411b363SPhilipp Reisner 
recv_resync_read(struct drbd_peer_device * peer_device,sector_t sector,struct packet_info * pi)205969a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2060a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2061b411b363SPhilipp Reisner {
206269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2063db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2064b411b363SPhilipp Reisner 
2065a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2066db830c46SAndreas Gruenbacher 	if (!peer_req)
206745bb912bSLars Ellenberg 		goto fail;
2068b411b363SPhilipp Reisner 
20690d11f3cfSChristoph Böhmwalder 	dec_rs_pending(peer_device);
2070b411b363SPhilipp Reisner 
2071b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2072b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2073b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2074b411b363SPhilipp Reisner 
2075a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
2076ce668b6dSChristoph Böhmwalder 	peer_req->opf = REQ_OP_WRITE;
207721ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
207845bb912bSLars Ellenberg 
20790500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2080b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
20810500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2082b411b363SPhilipp Reisner 
2083a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2084ce668b6dSChristoph Böhmwalder 	if (drbd_submit_peer_request(peer_req) == 0)
2085e1c1b0fcSAndreas Gruenbacher 		return 0;
208645bb912bSLars Ellenberg 
208710f6d992SLars Ellenberg 	/* don't care for the reason here */
2088d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
20890500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2090a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
20910500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
209222cc37a9SLars Ellenberg 
2093b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
209445bb912bSLars Ellenberg fail:
2095b30ab791SAndreas Gruenbacher 	put_ldev(device);
2096e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2097b411b363SPhilipp Reisner }
2098b411b363SPhilipp Reisner 
2099668eebc6SAndreas Gruenbacher static struct drbd_request *
find_request(struct drbd_device * device,struct rb_root * root,u64 id,sector_t sector,bool missing_ok,const char * func)2100b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2101bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2102b411b363SPhilipp Reisner {
2103b411b363SPhilipp Reisner 	struct drbd_request *req;
2104668eebc6SAndreas Gruenbacher 
2105bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2106bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21075e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2108668eebc6SAndreas Gruenbacher 		return req;
2109c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2110d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2111c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2112c3afd8f5SAndreas Gruenbacher 	}
2113668eebc6SAndreas Gruenbacher 	return NULL;
2114668eebc6SAndreas Gruenbacher }
2115668eebc6SAndreas Gruenbacher 
receive_DataReply(struct drbd_connection * connection,struct packet_info * pi)2116bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2117b411b363SPhilipp Reisner {
21189f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2119b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2120b411b363SPhilipp Reisner 	struct drbd_request *req;
2121b411b363SPhilipp Reisner 	sector_t sector;
212282bc0194SAndreas Gruenbacher 	int err;
2123e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21244a76b161SAndreas Gruenbacher 
21259f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21269f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21274a76b161SAndreas Gruenbacher 		return -EIO;
21289f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2129b411b363SPhilipp Reisner 
2130b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2131b411b363SPhilipp Reisner 
21320500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2133b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21340500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2135c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
213682bc0194SAndreas Gruenbacher 		return -EIO;
2137b411b363SPhilipp Reisner 
213869a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
213982bc0194SAndreas Gruenbacher 	if (!err)
2140ad878a0dSChristoph Böhmwalder 		req_mod(req, DATA_RECEIVED, peer_device);
2141b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2142b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2143b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2144b411b363SPhilipp Reisner 
214582bc0194SAndreas Gruenbacher 	return err;
2146b411b363SPhilipp Reisner }
2147b411b363SPhilipp Reisner 
receive_RSDataReply(struct drbd_connection * connection,struct packet_info * pi)2148bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2149b411b363SPhilipp Reisner {
21509f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2151b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2152b411b363SPhilipp Reisner 	sector_t sector;
215382bc0194SAndreas Gruenbacher 	int err;
2154e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21554a76b161SAndreas Gruenbacher 
21569f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21579f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21584a76b161SAndreas Gruenbacher 		return -EIO;
21599f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2160b411b363SPhilipp Reisner 
2161b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21620b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2163b411b363SPhilipp Reisner 
2164b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2165b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2166b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2167fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2168a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2169b411b363SPhilipp Reisner 	} else {
2170e3fa02d7SChristoph Böhmwalder 		if (drbd_ratelimit())
2171d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2172b411b363SPhilipp Reisner 
217369a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2174b411b363SPhilipp Reisner 
217569a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2176b411b363SPhilipp Reisner 	}
2177b411b363SPhilipp Reisner 
2178b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2179778f271dSPhilipp Reisner 
218082bc0194SAndreas Gruenbacher 	return err;
2181b411b363SPhilipp Reisner }
2182b411b363SPhilipp Reisner 
restart_conflicting_writes(struct drbd_device * device,sector_t sector,int size)2183b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
21847be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2185b411b363SPhilipp Reisner {
21867be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
21877be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2188b411b363SPhilipp Reisner 
2189b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21907be8da07SAndreas Gruenbacher 		if (!i->local)
21917be8da07SAndreas Gruenbacher 			continue;
21927be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
21937be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
21947be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
21957be8da07SAndreas Gruenbacher 			continue;
21962312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
21972312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2198ad878a0dSChristoph Böhmwalder 		__req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
21997be8da07SAndreas Gruenbacher 	}
22007be8da07SAndreas Gruenbacher }
22017be8da07SAndreas Gruenbacher 
2202a990be46SAndreas Gruenbacher /*
2203668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2204b411b363SPhilipp Reisner  */
e_end_block(struct drbd_work * w,int cancel)220599920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2206b411b363SPhilipp Reisner {
22078050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2208a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2209a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2210a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2211db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
221299920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2213b411b363SPhilipp Reisner 
2214303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2215db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2216b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2217b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2218db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2219b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2220a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2221b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
22220d11f3cfSChristoph Böhmwalder 				drbd_set_in_sync(peer_device, sector, peer_req->i.size);
2223b411b363SPhilipp Reisner 		} else {
2224a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2225b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2226b411b363SPhilipp Reisner 			 * maybe assert this?  */
2227b411b363SPhilipp Reisner 		}
2228b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2229b411b363SPhilipp Reisner 	}
223008d0dabfSLars Ellenberg 
2231b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2232b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2233302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22340500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22350b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2236b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22377be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2238b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22390500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2240bb3bfe96SAndreas Gruenbacher 	} else
22410b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2242b411b363SPhilipp Reisner 
22435dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2244b411b363SPhilipp Reisner 
224599920dc5SAndreas Gruenbacher 	return err;
2246b411b363SPhilipp Reisner }
2247b411b363SPhilipp Reisner 
e_send_ack(struct drbd_work * w,enum drbd_packet ack)2248a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2249b411b363SPhilipp Reisner {
22508050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2251a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2252a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
225399920dc5SAndreas Gruenbacher 	int err;
2254b411b363SPhilipp Reisner 
2255a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2256a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2257b411b363SPhilipp Reisner 
225899920dc5SAndreas Gruenbacher 	return err;
2259b411b363SPhilipp Reisner }
2260b411b363SPhilipp Reisner 
e_send_superseded(struct drbd_work * w,int unused)2261d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2262b6a370baSPhilipp Reisner {
2263a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22647be8da07SAndreas Gruenbacher }
2265b6a370baSPhilipp Reisner 
e_send_retry_write(struct drbd_work * w,int unused)226699920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22677be8da07SAndreas Gruenbacher {
2268a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2269a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2270a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
22717be8da07SAndreas Gruenbacher 
2272a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2273d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
22747be8da07SAndreas Gruenbacher }
22757be8da07SAndreas Gruenbacher 
seq_greater(u32 a,u32 b)22763e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
22773e394da1SAndreas Gruenbacher {
22783e394da1SAndreas Gruenbacher 	/*
22793e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
22803e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
22813e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
22823e394da1SAndreas Gruenbacher 	 */
22833e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
22843e394da1SAndreas Gruenbacher }
22853e394da1SAndreas Gruenbacher 
seq_max(u32 a,u32 b)22863e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
22873e394da1SAndreas Gruenbacher {
22883e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
22893e394da1SAndreas Gruenbacher }
22903e394da1SAndreas Gruenbacher 
update_peer_seq(struct drbd_peer_device * peer_device,unsigned int peer_seq)229169a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
22923e394da1SAndreas Gruenbacher {
229369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
22943c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
22953e394da1SAndreas Gruenbacher 
229669a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2297b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2298b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2299b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2300b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2301b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23023c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2303b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23043e394da1SAndreas Gruenbacher 	}
23057be8da07SAndreas Gruenbacher }
23063e394da1SAndreas Gruenbacher 
overlaps(sector_t s1,int l1,sector_t s2,int l2)2307d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2308d93f6302SLars Ellenberg {
2309d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2310d93f6302SLars Ellenberg }
2311d93f6302SLars Ellenberg 
2312d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
overlapping_resync_write(struct drbd_device * device,struct drbd_peer_request * peer_req)2313b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2314d93f6302SLars Ellenberg {
2315d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23167e5fec31SFabian Frederick 	bool rv = false;
2317b6a370baSPhilipp Reisner 
23180500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2319a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2320d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2321d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23227e5fec31SFabian Frederick 			rv = true;
2323b6a370baSPhilipp Reisner 			break;
2324b6a370baSPhilipp Reisner 		}
2325b6a370baSPhilipp Reisner 	}
23260500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2327b6a370baSPhilipp Reisner 
2328b6a370baSPhilipp Reisner 	return rv;
2329b6a370baSPhilipp Reisner }
2330b6a370baSPhilipp Reisner 
2331b411b363SPhilipp Reisner /* Called from receive_Data.
2332b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2333b411b363SPhilipp Reisner  *
2334b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2335b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2336b411b363SPhilipp Reisner  * been sent.
2337b411b363SPhilipp Reisner  *
2338b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2339b411b363SPhilipp Reisner  *
2340b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2341b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2342b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2343b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2344b411b363SPhilipp Reisner  *
2345b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2346b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2347b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2348b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2349b411b363SPhilipp Reisner  *
2350b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2351b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
wait_for_and_update_peer_seq(struct drbd_peer_device * peer_device,const u32 peer_seq)235269a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2353b411b363SPhilipp Reisner {
235469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2355b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2356b411b363SPhilipp Reisner 	long timeout;
2357b874d231SPhilipp Reisner 	int ret = 0, tp;
23587be8da07SAndreas Gruenbacher 
235969a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23607be8da07SAndreas Gruenbacher 		return 0;
23617be8da07SAndreas Gruenbacher 
2362b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2363b411b363SPhilipp Reisner 	for (;;) {
2364b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2365b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2366b411b363SPhilipp Reisner 			break;
23677be8da07SAndreas Gruenbacher 		}
2368b874d231SPhilipp Reisner 
2369b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2370b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2371b411b363SPhilipp Reisner 			break;
2372b411b363SPhilipp Reisner 		}
2373b874d231SPhilipp Reisner 
2374b874d231SPhilipp Reisner 		rcu_read_lock();
23755dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2376b874d231SPhilipp Reisner 		rcu_read_unlock();
2377b874d231SPhilipp Reisner 
2378b874d231SPhilipp Reisner 		if (!tp)
2379b874d231SPhilipp Reisner 			break;
2380b874d231SPhilipp Reisner 
2381b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2382b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2383b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
238444ed167dSPhilipp Reisner 		rcu_read_lock();
238569a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
238644ed167dSPhilipp Reisner 		rcu_read_unlock();
238771b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2388b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
23897be8da07SAndreas Gruenbacher 		if (!timeout) {
2390b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2391d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2392b411b363SPhilipp Reisner 			break;
2393b411b363SPhilipp Reisner 		}
2394b411b363SPhilipp Reisner 	}
2395b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2396b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2397b411b363SPhilipp Reisner 	return ret;
2398b411b363SPhilipp Reisner }
2399b411b363SPhilipp Reisner 
wire_flags_to_bio_op(u32 dpf)24009945172aSBart Van Assche static enum req_op wire_flags_to_bio_op(u32 dpf)
2401bb3cc85eSMike Christie {
2402f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
240345c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2404f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2405f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2406bb3cc85eSMike Christie 	else
2407bb3cc85eSMike Christie 		return REQ_OP_WRITE;
240876d2e7ecSPhilipp Reisner }
240976d2e7ecSPhilipp Reisner 
2410ce668b6dSChristoph Böhmwalder /* see also bio_flags_to_wire() */
wire_flags_to_bio(struct drbd_connection * connection,u32 dpf)2411ce668b6dSChristoph Böhmwalder static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
2412ce668b6dSChristoph Böhmwalder {
2413ce668b6dSChristoph Böhmwalder 	return wire_flags_to_bio_op(dpf) |
2414ce668b6dSChristoph Böhmwalder 		(dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2415ce668b6dSChristoph Böhmwalder 		(dpf & DP_FUA ? REQ_FUA : 0) |
2416ce668b6dSChristoph Böhmwalder 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2417ce668b6dSChristoph Böhmwalder }
2418ce668b6dSChristoph Böhmwalder 
fail_postponed_requests(struct drbd_device * device,sector_t sector,unsigned int size)2419b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24207be8da07SAndreas Gruenbacher 				    unsigned int size)
2421b411b363SPhilipp Reisner {
2422ad878a0dSChristoph Böhmwalder 	struct drbd_peer_device *peer_device = first_peer_device(device);
24237be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24247be8da07SAndreas Gruenbacher 
24257be8da07SAndreas Gruenbacher     repeat:
2426b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24277be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24287be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24297be8da07SAndreas Gruenbacher 
24307be8da07SAndreas Gruenbacher 		if (!i->local)
24317be8da07SAndreas Gruenbacher 			continue;
24327be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24337be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24347be8da07SAndreas Gruenbacher 			continue;
24357be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
2436ad878a0dSChristoph Böhmwalder 		__req_mod(req, NEG_ACKED, peer_device, &m);
24370500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24387be8da07SAndreas Gruenbacher 		if (m.bio)
2439b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24400500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24417be8da07SAndreas Gruenbacher 		goto repeat;
24427be8da07SAndreas Gruenbacher 	}
24437be8da07SAndreas Gruenbacher }
24447be8da07SAndreas Gruenbacher 
handle_write_conflicts(struct drbd_device * device,struct drbd_peer_request * peer_req)2445b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24467be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24477be8da07SAndreas Gruenbacher {
2448e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2449bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24507be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24517be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24527be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24537be8da07SAndreas Gruenbacher 	bool equal;
24547be8da07SAndreas Gruenbacher 	int err;
24557be8da07SAndreas Gruenbacher 
24567be8da07SAndreas Gruenbacher 	/*
24577be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24587be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24597be8da07SAndreas Gruenbacher 	 */
2460b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24617be8da07SAndreas Gruenbacher 
24627be8da07SAndreas Gruenbacher     repeat:
2463b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24647be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24657be8da07SAndreas Gruenbacher 			continue;
246608d0dabfSLars Ellenberg 		if (i->completed)
246708d0dabfSLars Ellenberg 			continue;
24687be8da07SAndreas Gruenbacher 
24697be8da07SAndreas Gruenbacher 		if (!i->local) {
24707be8da07SAndreas Gruenbacher 			/*
24717be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
24727be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
24737be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
24747be8da07SAndreas Gruenbacher 			 */
2475b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
24767be8da07SAndreas Gruenbacher 			if (err)
24777be8da07SAndreas Gruenbacher 				goto out;
24787be8da07SAndreas Gruenbacher 			goto repeat;
24797be8da07SAndreas Gruenbacher 		}
24807be8da07SAndreas Gruenbacher 
24817be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
24827be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
24837be8da07SAndreas Gruenbacher 			/*
24847be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2485d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2486d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2487d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
24887be8da07SAndreas Gruenbacher 			 */
2489d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
24907be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
24917be8da07SAndreas Gruenbacher 
24927be8da07SAndreas Gruenbacher 			if (!equal)
2493d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
24947be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
24957be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
24967be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
24977be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2498d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
24997be8da07SAndreas Gruenbacher 
2500a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
25017be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2502a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2503668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25047be8da07SAndreas Gruenbacher 
25057be8da07SAndreas Gruenbacher 			err = -ENOENT;
25067be8da07SAndreas Gruenbacher 			goto out;
25077be8da07SAndreas Gruenbacher 		} else {
25087be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25097be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25107be8da07SAndreas Gruenbacher 
25117be8da07SAndreas Gruenbacher 			if (!equal)
2512d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25137be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25147be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25157be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25167be8da07SAndreas Gruenbacher 
25177be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25187be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25197be8da07SAndreas Gruenbacher 				/*
25207be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2521d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2522d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2523d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25247be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25257be8da07SAndreas Gruenbacher 				 *
25267be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25277be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25287be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25297be8da07SAndreas Gruenbacher 				 */
2530b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25317be8da07SAndreas Gruenbacher 				if (err) {
2532e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2533b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25347be8da07SAndreas Gruenbacher 					goto out;
25357be8da07SAndreas Gruenbacher 				}
25367be8da07SAndreas Gruenbacher 				goto repeat;
25377be8da07SAndreas Gruenbacher 			}
25387be8da07SAndreas Gruenbacher 			/*
25397be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25407be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25417be8da07SAndreas Gruenbacher 			 */
25427be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25437be8da07SAndreas Gruenbacher 		}
25447be8da07SAndreas Gruenbacher 	}
25457be8da07SAndreas Gruenbacher 	err = 0;
25467be8da07SAndreas Gruenbacher 
25477be8da07SAndreas Gruenbacher     out:
25487be8da07SAndreas Gruenbacher 	if (err)
2549b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25507be8da07SAndreas Gruenbacher 	return err;
25517be8da07SAndreas Gruenbacher }
25527be8da07SAndreas Gruenbacher 
2553b411b363SPhilipp Reisner /* mirrored write */
receive_Data(struct drbd_connection * connection,struct packet_info * pi)2554bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2555b411b363SPhilipp Reisner {
25569f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2557b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
255821ae5d7fSLars Ellenberg 	struct net_conf *nc;
2559b411b363SPhilipp Reisner 	sector_t sector;
2560db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2561e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25627be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2563b411b363SPhilipp Reisner 	u32 dp_flags;
2564302bdeaeSPhilipp Reisner 	int err, tp;
25657be8da07SAndreas Gruenbacher 
25669f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25679f4fe9adSAndreas Gruenbacher 	if (!peer_device)
25684a76b161SAndreas Gruenbacher 		return -EIO;
25699f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2570b411b363SPhilipp Reisner 
2571b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
257282bc0194SAndreas Gruenbacher 		int err2;
2573b411b363SPhilipp Reisner 
257469a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
257569a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2576bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
257769a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
257882bc0194SAndreas Gruenbacher 		if (!err)
257982bc0194SAndreas Gruenbacher 			err = err2;
258082bc0194SAndreas Gruenbacher 		return err;
2581b411b363SPhilipp Reisner 	}
2582b411b363SPhilipp Reisner 
2583fcefa62eSAndreas Gruenbacher 	/*
2584fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2585fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2586fcefa62eSAndreas Gruenbacher 	 * end of this function.
2587fcefa62eSAndreas Gruenbacher 	 */
2588b411b363SPhilipp Reisner 
2589b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2590a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2591db830c46SAndreas Gruenbacher 	if (!peer_req) {
2592b30ab791SAndreas Gruenbacher 		put_ldev(device);
259382bc0194SAndreas Gruenbacher 		return -EIO;
2594b411b363SPhilipp Reisner 	}
2595b411b363SPhilipp Reisner 
2596a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
259721ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
259821ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2599b411b363SPhilipp Reisner 
2600688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2601ce668b6dSChristoph Böhmwalder 	peer_req->opf = wire_flags_to_bio(connection, dp_flags);
2602a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2603a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2604ce668b6dSChristoph Böhmwalder 		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
2605f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2606f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2607f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2608f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2609f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2610f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2611f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2612ce668b6dSChristoph Böhmwalder 		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
2613a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2614f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2615f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2616f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2617a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26180b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26190b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2620a73ff323SLars Ellenberg 	}
2621688593c5SLars Ellenberg 
2622688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2623db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2624688593c5SLars Ellenberg 
2625bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2626bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2627db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2628db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2629bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2630b411b363SPhilipp Reisner 
2631302bdeaeSPhilipp Reisner 	rcu_read_lock();
263221ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
263321ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
263421ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
263521ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
263621ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
263721ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
263821ae5d7fSLars Ellenberg 			break;
263921ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
264021ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
264121ae5d7fSLars Ellenberg 			break;
264221ae5d7fSLars Ellenberg 		}
264321ae5d7fSLars Ellenberg 	}
2644302bdeaeSPhilipp Reisner 	rcu_read_unlock();
264521ae5d7fSLars Ellenberg 
264621ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
264721ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
264821ae5d7fSLars Ellenberg 		inc_unacked(device);
264921ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
265021ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
265121ae5d7fSLars Ellenberg 	}
265221ae5d7fSLars Ellenberg 
265321ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
265421ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
265521ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26565dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
265721ae5d7fSLars Ellenberg 	}
265821ae5d7fSLars Ellenberg 
2659302bdeaeSPhilipp Reisner 	if (tp) {
266021ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
266121ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2662302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
266369a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26647be8da07SAndreas Gruenbacher 		if (err)
2665b411b363SPhilipp Reisner 			goto out_interrupted;
26660500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2667b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
26687be8da07SAndreas Gruenbacher 		if (err) {
26690500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
26707be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2671b30ab791SAndreas Gruenbacher 				put_ldev(device);
267282bc0194SAndreas Gruenbacher 				return 0;
2673b411b363SPhilipp Reisner 			}
2674b411b363SPhilipp Reisner 			goto out_interrupted;
2675b411b363SPhilipp Reisner 		}
2676b874d231SPhilipp Reisner 	} else {
267769a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
26780500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2679b874d231SPhilipp Reisner 	}
2680a34592ffSChristoph Hellwig 	/* TRIM and is processed synchronously,
26819104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2682a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2683a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2684a34592ffSChristoph Hellwig 	if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
2685b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
26860500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2687b411b363SPhilipp Reisner 
2688b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2689b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2690b6a370baSPhilipp Reisner 
2691b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2692b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
26930d11f3cfSChristoph Böhmwalder 		drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
2694db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
26954dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
269621ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2697b411b363SPhilipp Reisner 	}
2698b411b363SPhilipp Reisner 
2699ce668b6dSChristoph Böhmwalder 	err = drbd_submit_peer_request(peer_req);
270082bc0194SAndreas Gruenbacher 	if (!err)
270182bc0194SAndreas Gruenbacher 		return 0;
2702b411b363SPhilipp Reisner 
270310f6d992SLars Ellenberg 	/* don't care for the reason here */
2704d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27050500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2706a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2707b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27080500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
270921ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
271021ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2711b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
271221ae5d7fSLars Ellenberg 	}
271322cc37a9SLars Ellenberg 
2714b411b363SPhilipp Reisner out_interrupted:
27157e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2716b30ab791SAndreas Gruenbacher 	put_ldev(device);
2717b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
271882bc0194SAndreas Gruenbacher 	return err;
2719b411b363SPhilipp Reisner }
2720b411b363SPhilipp Reisner 
27210f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27220f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27230f0601f4SLars Ellenberg  *
27240f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27250f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27260f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27270f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27280f0601f4SLars Ellenberg  *
27290f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27300f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27310f0601f4SLars Ellenberg  */
drbd_rs_should_slow_down(struct drbd_peer_device * peer_device,sector_t sector,bool throttle_if_app_is_waiting)27320d11f3cfSChristoph Böhmwalder bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
2733ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2734e8299874SLars Ellenberg {
27350d11f3cfSChristoph Böhmwalder 	struct drbd_device *device = peer_device->device;
2736e8299874SLars Ellenberg 	struct lc_element *tmp;
2737ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2738e8299874SLars Ellenberg 
2739ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2740ad3fee79SLars Ellenberg 		return throttle;
2741e8299874SLars Ellenberg 
2742e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2743e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2744e8299874SLars Ellenberg 	if (tmp) {
2745e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2746e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2747e8299874SLars Ellenberg 			throttle = false;
2748ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2749ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2750e8299874SLars Ellenberg 	}
2751e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2752e8299874SLars Ellenberg 
2753e8299874SLars Ellenberg 	return throttle;
2754e8299874SLars Ellenberg }
2755e8299874SLars Ellenberg 
drbd_rs_c_min_rate_throttle(struct drbd_device * device)2756e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27570f0601f4SLars Ellenberg {
27588c40c7c4SChristoph Hellwig 	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27590f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2760daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2761e8299874SLars Ellenberg 	int curr_events;
2762daeda1ccSPhilipp Reisner 
2763daeda1ccSPhilipp Reisner 	rcu_read_lock();
2764b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2765daeda1ccSPhilipp Reisner 	rcu_read_unlock();
27660f0601f4SLars Ellenberg 
27670f0601f4SLars Ellenberg 	/* feature disabled? */
2768daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2769e8299874SLars Ellenberg 		return false;
2770e3555d85SPhilipp Reisner 
27718446fe92SChristoph Hellwig 	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2772b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2773ad3fee79SLars Ellenberg 
2774ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2775ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
27760f0601f4SLars Ellenberg 		unsigned long rs_left;
27770f0601f4SLars Ellenberg 		int i;
27780f0601f4SLars Ellenberg 
2779b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
27800f0601f4SLars Ellenberg 
27810f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
27820f0601f4SLars Ellenberg 		 * approx. */
2783b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
27842649f080SLars Ellenberg 
2785b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2786b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
27872649f080SLars Ellenberg 		else
2788b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
27890f0601f4SLars Ellenberg 
2790b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
27910f0601f4SLars Ellenberg 		if (!dt)
27920f0601f4SLars Ellenberg 			dt++;
2793b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
27940f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
27950f0601f4SLars Ellenberg 
2796daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2797e8299874SLars Ellenberg 			return true;
27980f0601f4SLars Ellenberg 	}
2799e8299874SLars Ellenberg 	return false;
28000f0601f4SLars Ellenberg }
28010f0601f4SLars Ellenberg 
receive_DataRequest(struct drbd_connection * connection,struct packet_info * pi)2802bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2803b411b363SPhilipp Reisner {
28049f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2805b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2806b411b363SPhilipp Reisner 	sector_t sector;
28074a76b161SAndreas Gruenbacher 	sector_t capacity;
2808db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2809b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2810b18b37beSPhilipp Reisner 	int size, verb;
2811e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28124a76b161SAndreas Gruenbacher 
28139f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28149f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28154a76b161SAndreas Gruenbacher 		return -EIO;
28169f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2817155bd9d1SChristoph Hellwig 	capacity = get_capacity(device->vdisk);
2818b411b363SPhilipp Reisner 
2819b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2820b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2821b411b363SPhilipp Reisner 
2822c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2823d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2824b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
282582bc0194SAndreas Gruenbacher 		return -EINVAL;
2826b411b363SPhilipp Reisner 	}
2827b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2828d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2829b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
283082bc0194SAndreas Gruenbacher 		return -EINVAL;
2831b411b363SPhilipp Reisner 	}
2832b411b363SPhilipp Reisner 
2833b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2834b18b37beSPhilipp Reisner 		verb = 1;
2835e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2836b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
283769a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2838b18b37beSPhilipp Reisner 			break;
2839700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2840b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2841b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2842b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
284369a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2844b18b37beSPhilipp Reisner 			break;
2845b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2846b18b37beSPhilipp Reisner 			verb = 0;
28470d11f3cfSChristoph Böhmwalder 			dec_rs_pending(peer_device);
284869a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2849b18b37beSPhilipp Reisner 			break;
2850b18b37beSPhilipp Reisner 		default:
285149ba9b1bSAndreas Gruenbacher 			BUG();
2852b18b37beSPhilipp Reisner 		}
2853e3fa02d7SChristoph Böhmwalder 		if (verb && drbd_ratelimit())
2854d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2855b411b363SPhilipp Reisner 			    "no local data.\n");
2856b18b37beSPhilipp Reisner 
2857a821cc4aSLars Ellenberg 		/* drain possibly payload */
285869a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2859b411b363SPhilipp Reisner 	}
2860b411b363SPhilipp Reisner 
2861b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2862b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2863b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2864a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28659104d31aSLars Ellenberg 			size, GFP_NOIO);
2866db830c46SAndreas Gruenbacher 	if (!peer_req) {
2867b30ab791SAndreas Gruenbacher 		put_ldev(device);
286882bc0194SAndreas Gruenbacher 		return -ENOMEM;
2869b411b363SPhilipp Reisner 	}
2870ce668b6dSChristoph Böhmwalder 	peer_req->opf = REQ_OP_READ;
2871b411b363SPhilipp Reisner 
2872e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2873b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2874a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
287580a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
287621ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
287780a40e43SLars Ellenberg 		goto submit;
287880a40e43SLars Ellenberg 
2879700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2880700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2881700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2882700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2883700ca8c0SPhilipp Reisner 		   the block... */
2884700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2885df561f66SGustavo A. R. Silva 		fallthrough;
2886b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2887a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
28885f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2889b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2890b411b363SPhilipp Reisner 		break;
2891b411b363SPhilipp Reisner 
2892b411b363SPhilipp Reisner 	case P_OV_REPLY:
2893b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2894e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2895b411b363SPhilipp Reisner 		if (!di)
2896b411b363SPhilipp Reisner 			goto out_free_e;
2897b411b363SPhilipp Reisner 
2898e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2899b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2900b411b363SPhilipp Reisner 
2901db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2902db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2903c36c3cedSLars Ellenberg 
29049f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2905b411b363SPhilipp Reisner 			goto out_free_e;
2906b411b363SPhilipp Reisner 
2907e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29089f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2909a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29105f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2911b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2912aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2913aaaba345SLars Ellenberg 			device->use_csums = true;
2914e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29152649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2916b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2917a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
29180d11f3cfSChristoph Böhmwalder 			dec_rs_pending(peer_device);
29190f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29200f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29210f0601f4SLars Ellenberg 			goto submit_for_resync;
2922b411b363SPhilipp Reisner 		}
2923b411b363SPhilipp Reisner 		break;
2924b411b363SPhilipp Reisner 
2925b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2926b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29279f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2928de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2929de228bbaSLars Ellenberg 			int i;
2930b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2931b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2932b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2933b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2934de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2935b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2936b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2937de228bbaSLars Ellenberg 			}
2938d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2939b411b363SPhilipp Reisner 					(unsigned long long)sector);
2940b411b363SPhilipp Reisner 		}
2941a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2942b411b363SPhilipp Reisner 		break;
2943b411b363SPhilipp Reisner 
2944b411b363SPhilipp Reisner 	default:
294549ba9b1bSAndreas Gruenbacher 		BUG();
2946b411b363SPhilipp Reisner 	}
2947b411b363SPhilipp Reisner 
29480f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29490f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29500f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29510f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29520f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29530f0601f4SLars Ellenberg 	 *
29540f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29550f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29560f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29570f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29580f0601f4SLars Ellenberg 	 * a while, anyways.
29590f0601f4SLars Ellenberg 	 */
2960b411b363SPhilipp Reisner 
29610f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
29620f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
29630f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
29640f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
29650f0601f4SLars Ellenberg 	 *
29660f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
29670f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
29680f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
29690f0601f4SLars Ellenberg 	 */
2970c5a2c150SLars Ellenberg 
2971c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
2972c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
2973c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
2974c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
2975c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
2976c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
2977c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
2978c5a2c150SLars Ellenberg 
2979944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2980ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
29810d11f3cfSChristoph Böhmwalder 	&& drbd_rs_should_slow_down(peer_device, sector, false))
2982e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
2983944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
2984b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
298580a40e43SLars Ellenberg 		goto out_free_e;
2986b411b363SPhilipp Reisner 
29870f0601f4SLars Ellenberg submit_for_resync:
2988b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
29890f0601f4SLars Ellenberg 
299080a40e43SLars Ellenberg submit:
2991944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
2992b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2993ce668b6dSChristoph Böhmwalder 	if (drbd_submit_peer_request(peer_req) == 0)
299482bc0194SAndreas Gruenbacher 		return 0;
2995b411b363SPhilipp Reisner 
299610f6d992SLars Ellenberg 	/* don't care for the reason here */
2997d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
2998c5a2c150SLars Ellenberg 
2999c5a2c150SLars Ellenberg out_free_e:
30000500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
3001a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
30020500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
300322cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
300422cc37a9SLars Ellenberg 
3005b30ab791SAndreas Gruenbacher 	put_ldev(device);
3006b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
300782bc0194SAndreas Gruenbacher 	return -EIO;
3008b411b363SPhilipp Reisner }
3009b411b363SPhilipp Reisner 
30109b48ff07SLee Jones /*
301169a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
301269a22773SAndreas Gruenbacher  */
drbd_asb_recover_0p(struct drbd_peer_device * peer_device)301369a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3014b411b363SPhilipp Reisner {
301569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3016b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3017b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
301844ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3019b411b363SPhilipp Reisner 
3020b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3021b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3022b411b363SPhilipp Reisner 
3023b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3024b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3025b411b363SPhilipp Reisner 
302644ed167dSPhilipp Reisner 	rcu_read_lock();
302769a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
302844ed167dSPhilipp Reisner 	rcu_read_unlock();
302944ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3030b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3031b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3032b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
303344ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3034d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3035b411b363SPhilipp Reisner 		break;
3036b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3037b411b363SPhilipp Reisner 		break;
3038b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3039b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3040b411b363SPhilipp Reisner 			rv = -1;
3041b411b363SPhilipp Reisner 			break;
3042b411b363SPhilipp Reisner 		}
3043b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3044b411b363SPhilipp Reisner 			rv =  1;
3045b411b363SPhilipp Reisner 			break;
3046b411b363SPhilipp Reisner 		}
3047df561f66SGustavo A. R. Silva 		fallthrough;	/* to one of the other strategies */
3048b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3049b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3050b411b363SPhilipp Reisner 			rv = 1;
3051b411b363SPhilipp Reisner 			break;
3052b411b363SPhilipp Reisner 		}
3053b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3054b411b363SPhilipp Reisner 			rv = -1;
3055b411b363SPhilipp Reisner 			break;
3056b411b363SPhilipp Reisner 		}
3057b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3058d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3059b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3060df561f66SGustavo A. R. Silva 		fallthrough;
3061b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3062b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
306369a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3064b411b363SPhilipp Reisner 				? -1 : 1;
3065b411b363SPhilipp Reisner 			break;
3066b411b363SPhilipp Reisner 		} else {
3067b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3068b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3069b411b363SPhilipp Reisner 		}
307044ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3071b411b363SPhilipp Reisner 			break;
3072df561f66SGustavo A. R. Silva 		fallthrough;
3073b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3074b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3075b411b363SPhilipp Reisner 			rv = -1;
3076b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3077b411b363SPhilipp Reisner 			rv =  1;
3078b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3079b411b363SPhilipp Reisner 		     /* Well, then use something else. */
308069a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3081b411b363SPhilipp Reisner 				? -1 : 1;
3082b411b363SPhilipp Reisner 		break;
3083b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3084b411b363SPhilipp Reisner 		rv = -1;
3085b411b363SPhilipp Reisner 		break;
3086b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3087b411b363SPhilipp Reisner 		rv =  1;
3088b411b363SPhilipp Reisner 	}
3089b411b363SPhilipp Reisner 
3090b411b363SPhilipp Reisner 	return rv;
3091b411b363SPhilipp Reisner }
3092b411b363SPhilipp Reisner 
30939b48ff07SLee Jones /*
309469a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
309569a22773SAndreas Gruenbacher  */
drbd_asb_recover_1p(struct drbd_peer_device * peer_device)309669a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3097b411b363SPhilipp Reisner {
309869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
30996184ea21SAndreas Gruenbacher 	int hg, rv = -100;
310044ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3101b411b363SPhilipp Reisner 
310244ed167dSPhilipp Reisner 	rcu_read_lock();
310369a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
310444ed167dSPhilipp Reisner 	rcu_read_unlock();
310544ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3106b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3107b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3108b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3109b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3110b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
311144ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3112d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3113b411b363SPhilipp Reisner 		break;
3114b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3115b411b363SPhilipp Reisner 		break;
3116b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
311769a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3118b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3119b411b363SPhilipp Reisner 			rv = hg;
3120b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3121b411b363SPhilipp Reisner 			rv = hg;
3122b411b363SPhilipp Reisner 		break;
3123b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
312469a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3125b411b363SPhilipp Reisner 		break;
3126b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3127b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3128b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
312969a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3130b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3131bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3132bb437946SAndreas Gruenbacher 
3133b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3134b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3135b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3136b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3137bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3138b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3139b411b363SPhilipp Reisner 			} else {
3140d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3141b411b363SPhilipp Reisner 				rv = hg;
3142b411b363SPhilipp Reisner 			}
3143b411b363SPhilipp Reisner 		} else
3144b411b363SPhilipp Reisner 			rv = hg;
3145b411b363SPhilipp Reisner 	}
3146b411b363SPhilipp Reisner 
3147b411b363SPhilipp Reisner 	return rv;
3148b411b363SPhilipp Reisner }
3149b411b363SPhilipp Reisner 
31509b48ff07SLee Jones /*
315169a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
315269a22773SAndreas Gruenbacher  */
drbd_asb_recover_2p(struct drbd_peer_device * peer_device)315369a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3154b411b363SPhilipp Reisner {
315569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31566184ea21SAndreas Gruenbacher 	int hg, rv = -100;
315744ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3158b411b363SPhilipp Reisner 
315944ed167dSPhilipp Reisner 	rcu_read_lock();
316069a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
316144ed167dSPhilipp Reisner 	rcu_read_unlock();
316244ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3163b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3164b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3165b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3166b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3167b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3168b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3169b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
317044ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3171d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3172b411b363SPhilipp Reisner 		break;
3173b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
317469a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3175b411b363SPhilipp Reisner 		break;
3176b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3177b411b363SPhilipp Reisner 		break;
3178b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
317969a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3180b411b363SPhilipp Reisner 		if (hg == -1) {
3181bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3182bb437946SAndreas Gruenbacher 
3183b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3184b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3185b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3186b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3187bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3188b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3189b411b363SPhilipp Reisner 			} else {
3190d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3191b411b363SPhilipp Reisner 				rv = hg;
3192b411b363SPhilipp Reisner 			}
3193b411b363SPhilipp Reisner 		} else
3194b411b363SPhilipp Reisner 			rv = hg;
3195b411b363SPhilipp Reisner 	}
3196b411b363SPhilipp Reisner 
3197b411b363SPhilipp Reisner 	return rv;
3198b411b363SPhilipp Reisner }
3199b411b363SPhilipp Reisner 
drbd_uuid_dump(struct drbd_device * device,char * text,u64 * uuid,u64 bits,u64 flags)3200b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3201b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3202b411b363SPhilipp Reisner {
3203b411b363SPhilipp Reisner 	if (!uuid) {
3204d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3205b411b363SPhilipp Reisner 		return;
3206b411b363SPhilipp Reisner 	}
3207d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3208b411b363SPhilipp Reisner 	     text,
3209b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3210b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3211b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3212b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3213b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3214b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3215b411b363SPhilipp Reisner }
3216b411b363SPhilipp Reisner 
3217b411b363SPhilipp Reisner /*
3218b411b363SPhilipp Reisner   100	after split brain try auto recover
3219b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3220b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3221b411b363SPhilipp Reisner     0	no Sync
3222b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3223b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3224b411b363SPhilipp Reisner  -100	after split brain, disconnect
3225b411b363SPhilipp Reisner -1000	unrelated data
32264a23f264SPhilipp Reisner -1091   requires proto 91
32274a23f264SPhilipp Reisner -1096   requires proto 96
3228b411b363SPhilipp Reisner  */
3229f2d3d75bSLars Ellenberg 
drbd_uuid_compare(struct drbd_peer_device * const peer_device,enum drbd_role const peer_role,int * rule_nr)3230db445db1SChristoph Böhmwalder static int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
3231db445db1SChristoph Böhmwalder 		enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3232b411b363SPhilipp Reisner {
3233db445db1SChristoph Böhmwalder 	struct drbd_connection *const connection = peer_device->connection;
3234db445db1SChristoph Böhmwalder 	struct drbd_device *device = peer_device->device;
3235b411b363SPhilipp Reisner 	u64 self, peer;
3236b411b363SPhilipp Reisner 	int i, j;
3237b411b363SPhilipp Reisner 
3238b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3239b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3240b411b363SPhilipp Reisner 
3241b411b363SPhilipp Reisner 	*rule_nr = 10;
3242b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3243b411b363SPhilipp Reisner 		return 0;
3244b411b363SPhilipp Reisner 
3245b411b363SPhilipp Reisner 	*rule_nr = 20;
3246b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3247b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3248b411b363SPhilipp Reisner 		return -2;
3249b411b363SPhilipp Reisner 
3250b411b363SPhilipp Reisner 	*rule_nr = 30;
3251b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3252b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3253b411b363SPhilipp Reisner 		return 2;
3254b411b363SPhilipp Reisner 
3255b411b363SPhilipp Reisner 	if (self == peer) {
3256b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3257b411b363SPhilipp Reisner 
3258b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3259b411b363SPhilipp Reisner 
326044a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32614a23f264SPhilipp Reisner 				return -1091;
3262b411b363SPhilipp Reisner 
3263b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3264b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3265d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3266b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3267b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3268b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3269b411b363SPhilipp Reisner 
3270b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3271b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3272b411b363SPhilipp Reisner 				*rule_nr = 34;
3273b411b363SPhilipp Reisner 			} else {
3274d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3275b411b363SPhilipp Reisner 				*rule_nr = 36;
3276b411b363SPhilipp Reisner 			}
3277b411b363SPhilipp Reisner 
3278b411b363SPhilipp Reisner 			return 1;
3279b411b363SPhilipp Reisner 		}
3280b411b363SPhilipp Reisner 
3281b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3282b411b363SPhilipp Reisner 
328344a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32844a23f264SPhilipp Reisner 				return -1091;
3285b411b363SPhilipp Reisner 
3286b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3287b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3288d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3289b411b363SPhilipp Reisner 
3290b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3291b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3292b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3293b411b363SPhilipp Reisner 
3294b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3295b411b363SPhilipp Reisner 				*rule_nr = 35;
3296b411b363SPhilipp Reisner 			} else {
3297d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3298b411b363SPhilipp Reisner 				*rule_nr = 37;
3299b411b363SPhilipp Reisner 			}
3300b411b363SPhilipp Reisner 
3301b411b363SPhilipp Reisner 			return -1;
3302b411b363SPhilipp Reisner 		}
3303b411b363SPhilipp Reisner 
3304b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3305b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3306b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3307b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3308b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3309b411b363SPhilipp Reisner 		*rule_nr = 40;
3310b411b363SPhilipp Reisner 
3311f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3312f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3313f2d3d75bSLars Ellenberg 		if (rct == 0)
3314f2d3d75bSLars Ellenberg 			return 0;
3315f2d3d75bSLars Ellenberg 
3316f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3317f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3318f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3319f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3320f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3321f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3322f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3323f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3324f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3325f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3326f2d3d75bSLars Ellenberg 			}
3327f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3328f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3329f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3330f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3331f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3332f2d3d75bSLars Ellenberg 				return -100;
3333f2d3d75bSLars Ellenberg 			}
3334f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3335f2d3d75bSLars Ellenberg 				return 1;
3336f2d3d75bSLars Ellenberg 			return -1;
3337f2d3d75bSLars Ellenberg 		}
3338f2d3d75bSLars Ellenberg 
3339f2d3d75bSLars Ellenberg 		/* Both are secondary.
3340f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3341f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3342b411b363SPhilipp Reisner 		switch (rct) {
3343f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3344b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3345b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3346b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
334744a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3348b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3349b411b363SPhilipp Reisner 		}
3350b411b363SPhilipp Reisner 	}
3351b411b363SPhilipp Reisner 
3352b411b363SPhilipp Reisner 	*rule_nr = 50;
3353b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3354b411b363SPhilipp Reisner 	if (self == peer)
3355b411b363SPhilipp Reisner 		return -1;
3356b411b363SPhilipp Reisner 
3357b411b363SPhilipp Reisner 	*rule_nr = 51;
3358b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3359b411b363SPhilipp Reisner 	if (self == peer) {
336044a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3361b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3362b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3363b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3364b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3365b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3366b411b363SPhilipp Reisner 
336744a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33684a23f264SPhilipp Reisner 				return -1091;
3369b411b363SPhilipp Reisner 
3370b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3371b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
33724a23f264SPhilipp Reisner 
3373d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3374b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
33754a23f264SPhilipp Reisner 
3376b411b363SPhilipp Reisner 			return -1;
3377b411b363SPhilipp Reisner 		}
3378b411b363SPhilipp Reisner 	}
3379b411b363SPhilipp Reisner 
3380b411b363SPhilipp Reisner 	*rule_nr = 60;
3381b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3382b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3383b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3384b411b363SPhilipp Reisner 		if (self == peer)
3385b411b363SPhilipp Reisner 			return -2;
3386b411b363SPhilipp Reisner 	}
3387b411b363SPhilipp Reisner 
3388b411b363SPhilipp Reisner 	*rule_nr = 70;
3389b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3390b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3391b411b363SPhilipp Reisner 	if (self == peer)
3392b411b363SPhilipp Reisner 		return 1;
3393b411b363SPhilipp Reisner 
3394b411b363SPhilipp Reisner 	*rule_nr = 71;
3395b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3396b411b363SPhilipp Reisner 	if (self == peer) {
339744a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3398b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3399b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3400b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3401b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3402b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3403b411b363SPhilipp Reisner 
340444a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34054a23f264SPhilipp Reisner 				return -1091;
3406b411b363SPhilipp Reisner 
3407b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3408b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3409b411b363SPhilipp Reisner 
3410d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3411b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3412b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3413b411b363SPhilipp Reisner 
3414b411b363SPhilipp Reisner 			return 1;
3415b411b363SPhilipp Reisner 		}
3416b411b363SPhilipp Reisner 	}
3417b411b363SPhilipp Reisner 
3418b411b363SPhilipp Reisner 
3419b411b363SPhilipp Reisner 	*rule_nr = 80;
3420b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3421b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3422b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3423b411b363SPhilipp Reisner 		if (self == peer)
3424b411b363SPhilipp Reisner 			return 2;
3425b411b363SPhilipp Reisner 	}
3426b411b363SPhilipp Reisner 
3427b411b363SPhilipp Reisner 	*rule_nr = 90;
3428b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3429b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3430b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3431b411b363SPhilipp Reisner 		return 100;
3432b411b363SPhilipp Reisner 
3433b411b363SPhilipp Reisner 	*rule_nr = 100;
3434b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3435b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3436b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3437b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3438b411b363SPhilipp Reisner 			if (self == peer)
3439b411b363SPhilipp Reisner 				return -100;
3440b411b363SPhilipp Reisner 		}
3441b411b363SPhilipp Reisner 	}
3442b411b363SPhilipp Reisner 
3443b411b363SPhilipp Reisner 	return -1000;
3444b411b363SPhilipp Reisner }
3445b411b363SPhilipp Reisner 
3446b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3447b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3448b411b363SPhilipp Reisner  */
drbd_sync_handshake(struct drbd_peer_device * peer_device,enum drbd_role peer_role,enum drbd_disk_state peer_disk)344969a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
345069a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3451b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3452b411b363SPhilipp Reisner {
345369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3454b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3455b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
345644ed167dSPhilipp Reisner 	struct net_conf *nc;
3457d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3458b411b363SPhilipp Reisner 
3459b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3460b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3461b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3462b411b363SPhilipp Reisner 
3463d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
34649f2247bbSPhilipp Reisner 
3465b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3466b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3467b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3468b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3469b411b363SPhilipp Reisner 
3470db445db1SChristoph Böhmwalder 	hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
3471b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3472b411b363SPhilipp Reisner 
3473d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3474b411b363SPhilipp Reisner 
3475b411b363SPhilipp Reisner 	if (hg == -1000) {
3476d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3477b411b363SPhilipp Reisner 		return C_MASK;
3478b411b363SPhilipp Reisner 	}
3479f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3480f2d3d75bSLars Ellenberg 		int proto, fflags;
3481f2d3d75bSLars Ellenberg 		hg = -hg;
3482f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3483f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3484f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3485f2d3d75bSLars Ellenberg 					proto, fflags);
3486f2d3d75bSLars Ellenberg 		return C_MASK;
3487f2d3d75bSLars Ellenberg 	}
34884a23f264SPhilipp Reisner 	if (hg < -1000) {
3489d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3490b411b363SPhilipp Reisner 		return C_MASK;
3491b411b363SPhilipp Reisner 	}
3492b411b363SPhilipp Reisner 
3493b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3494b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3495b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3496b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3497b411b363SPhilipp Reisner 		if (f)
3498b411b363SPhilipp Reisner 			hg = hg*2;
3499d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3500b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3501b411b363SPhilipp Reisner 	}
3502b411b363SPhilipp Reisner 
35033a11a487SAdam Gandelman 	if (abs(hg) == 100)
3504b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35053a11a487SAdam Gandelman 
350644ed167dSPhilipp Reisner 	rcu_read_lock();
350769a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3508d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3509d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3510d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3511d29e89e3SRoland Kammerer 	rcu_read_unlock();
351244ed167dSPhilipp Reisner 
3513d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3514b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3515b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3516b411b363SPhilipp Reisner 		int forced = (hg == -100);
3517b411b363SPhilipp Reisner 
3518b411b363SPhilipp Reisner 		switch (pcount) {
3519b411b363SPhilipp Reisner 		case 0:
352069a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3521b411b363SPhilipp Reisner 			break;
3522b411b363SPhilipp Reisner 		case 1:
352369a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3524b411b363SPhilipp Reisner 			break;
3525b411b363SPhilipp Reisner 		case 2:
352669a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3527b411b363SPhilipp Reisner 			break;
3528b411b363SPhilipp Reisner 		}
3529b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3530d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3531b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3532b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3533b411b363SPhilipp Reisner 			if (forced) {
3534d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3535b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3536b411b363SPhilipp Reisner 				hg = hg*2;
3537b411b363SPhilipp Reisner 			}
3538b411b363SPhilipp Reisner 		}
3539b411b363SPhilipp Reisner 	}
3540b411b363SPhilipp Reisner 
3541b411b363SPhilipp Reisner 	if (hg == -100) {
3542b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3543b411b363SPhilipp Reisner 			hg = -1;
3544b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3545b411b363SPhilipp Reisner 			hg = 1;
3546b411b363SPhilipp Reisner 
3547b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3548d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3549b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3550b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3551b411b363SPhilipp Reisner 	}
3552b411b363SPhilipp Reisner 
3553b411b363SPhilipp Reisner 	if (hg == -100) {
3554580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3555580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3556580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3557580b9767SLars Ellenberg 		 * to that disk, in a way... */
3558d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3559b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3560b411b363SPhilipp Reisner 		return C_MASK;
3561b411b363SPhilipp Reisner 	}
3562b411b363SPhilipp Reisner 
3563b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3564d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3565b411b363SPhilipp Reisner 		return C_MASK;
3566b411b363SPhilipp Reisner 	}
3567b411b363SPhilipp Reisner 
3568b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3569b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
357044ed167dSPhilipp Reisner 		switch (rr_conflict) {
3571b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3572b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3573df561f66SGustavo A. R. Silva 			fallthrough;
3574b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3575d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3576b411b363SPhilipp Reisner 			return C_MASK;
3577b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3578d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3579b411b363SPhilipp Reisner 			     "assumption\n");
3580b411b363SPhilipp Reisner 		}
3581b411b363SPhilipp Reisner 	}
3582b411b363SPhilipp Reisner 
358369a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3584cf14c2e9SPhilipp Reisner 		if (hg == 0)
3585d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3586cf14c2e9SPhilipp Reisner 		else
3587d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3588cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3589cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3590cf14c2e9SPhilipp Reisner 		return C_MASK;
3591cf14c2e9SPhilipp Reisner 	}
3592cf14c2e9SPhilipp Reisner 
3593b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3594d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3595b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
35968164dd6cSAndreas Gruenbacher 					BM_LOCKED_SET_ALLOWED, NULL))
3597b411b363SPhilipp Reisner 			return C_MASK;
3598b411b363SPhilipp Reisner 	}
3599b411b363SPhilipp Reisner 
3600b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3601b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3602b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3603b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3604b411b363SPhilipp Reisner 	} else {
3605b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3606b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3607d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3608b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3609b411b363SPhilipp Reisner 		}
3610b411b363SPhilipp Reisner 	}
3611b411b363SPhilipp Reisner 
3612b411b363SPhilipp Reisner 	return rv;
3613b411b363SPhilipp Reisner }
3614b411b363SPhilipp Reisner 
convert_after_sb(enum drbd_after_sb_p peer)3615f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3616b411b363SPhilipp Reisner {
3617b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3618f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3619f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3620b411b363SPhilipp Reisner 
3621b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3622f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3623f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3624b411b363SPhilipp Reisner 
3625b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3626f179d76dSPhilipp Reisner 	return peer;
3627b411b363SPhilipp Reisner }
3628b411b363SPhilipp Reisner 
receive_protocol(struct drbd_connection * connection,struct packet_info * pi)3629bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3630b411b363SPhilipp Reisner {
3631e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3632036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3633036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3634036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3635036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36363d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36377aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3638b411b363SPhilipp Reisner 
3639b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3640b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3641b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3642b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3643b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3644cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36456139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3646cf14c2e9SPhilipp Reisner 
3647bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
364886db0618SAndreas Gruenbacher 		int err;
364986db0618SAndreas Gruenbacher 
365088104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
365186db0618SAndreas Gruenbacher 			return -EIO;
3652bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
365386db0618SAndreas Gruenbacher 		if (err)
365486db0618SAndreas Gruenbacher 			return err;
365588104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3656036b17eaSPhilipp Reisner 	}
365786db0618SAndreas Gruenbacher 
36587d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3659bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3660cf14c2e9SPhilipp Reisner 
3661cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3662bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3663b411b363SPhilipp Reisner 
366444ed167dSPhilipp Reisner 		rcu_read_lock();
3665bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
366644ed167dSPhilipp Reisner 
3667036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
36681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
366944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3670b411b363SPhilipp Reisner 		}
3671b411b363SPhilipp Reisner 
3672f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
36731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
367444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3675b411b363SPhilipp Reisner 		}
3676b411b363SPhilipp Reisner 
3677f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
36781ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
367944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3680b411b363SPhilipp Reisner 		}
3681b411b363SPhilipp Reisner 
3682f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
36831ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
368444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3685b411b363SPhilipp Reisner 		}
3686b411b363SPhilipp Reisner 
36876139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
36881ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
368944ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3690b411b363SPhilipp Reisner 		}
3691b411b363SPhilipp Reisner 
369244ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
36931ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
369444ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3695b411b363SPhilipp Reisner 		}
3696b411b363SPhilipp Reisner 
3697036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
36981ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3699036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3700036b17eaSPhilipp Reisner 		}
3701036b17eaSPhilipp Reisner 
370286db0618SAndreas Gruenbacher 		rcu_read_unlock();
3703fbc12f45SAndreas Gruenbacher 	}
37047d4c782cSAndreas Gruenbacher 
37057d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37067d4c782cSAndreas Gruenbacher 		int hash_size;
37077d4c782cSAndreas Gruenbacher 
37087d4c782cSAndreas Gruenbacher 		/*
37097d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37107d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37117d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37127d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37137d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37147d4c782cSAndreas Gruenbacher 		 * change.
37157d4c782cSAndreas Gruenbacher 		 */
37167d4c782cSAndreas Gruenbacher 
37173d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37181b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37191b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37201ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37217d4c782cSAndreas Gruenbacher 				 integrity_alg);
3722b411b363SPhilipp Reisner 			goto disconnect;
3723b411b363SPhilipp Reisner 		}
3724b411b363SPhilipp Reisner 
37253d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37267d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37277d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37287d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37291ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37307d4c782cSAndreas Gruenbacher 			goto disconnect;
37317d4c782cSAndreas Gruenbacher 		}
37327d4c782cSAndreas Gruenbacher 	}
37337d4c782cSAndreas Gruenbacher 
37347d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37358404e191SZhen Lei 	if (!new_net_conf)
3736b411b363SPhilipp Reisner 		goto disconnect;
3737b411b363SPhilipp Reisner 
3738bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37390500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3740bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37417d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3742b411b363SPhilipp Reisner 
37437d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37447d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37457d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37467d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37477d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3748b411b363SPhilipp Reisner 
3749bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37500500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3751bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3752b411b363SPhilipp Reisner 
37533d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3754bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3755bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3756bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3757bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3758bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3759b411b363SPhilipp Reisner 
37607d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37611ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
37627d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3763b411b363SPhilipp Reisner 
3764a77b2109SUladzislau Rezki (Sony) 	kvfree_rcu_mightsleep(old_net_conf);
376582bc0194SAndreas Gruenbacher 	return 0;
3766b411b363SPhilipp Reisner 
376744ed167dSPhilipp Reisner disconnect_rcu_unlock:
376844ed167dSPhilipp Reisner 	rcu_read_unlock();
3769b411b363SPhilipp Reisner disconnect:
37703d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3771036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3772036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3773bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
377482bc0194SAndreas Gruenbacher 	return -EIO;
3775b411b363SPhilipp Reisner }
3776b411b363SPhilipp Reisner 
3777b411b363SPhilipp Reisner /* helper function
3778b411b363SPhilipp Reisner  * input: alg name, feature name
3779b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3780b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3781b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
drbd_crypto_alloc_digest_safe(const struct drbd_device * device,const char * alg,const char * name)37823d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
37833d0e6375SKees Cook 		const struct drbd_device *device,
3784b411b363SPhilipp Reisner 		const char *alg, const char *name)
3785b411b363SPhilipp Reisner {
37863d0e6375SKees Cook 	struct crypto_shash *tfm;
3787b411b363SPhilipp Reisner 
3788b411b363SPhilipp Reisner 	if (!alg[0])
3789b411b363SPhilipp Reisner 		return NULL;
3790b411b363SPhilipp Reisner 
37913d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3792b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3793d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3794b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3795b411b363SPhilipp Reisner 		return tfm;
3796b411b363SPhilipp Reisner 	}
3797b411b363SPhilipp Reisner 	return tfm;
3798b411b363SPhilipp Reisner }
3799b411b363SPhilipp Reisner 
ignore_remaining_packet(struct drbd_connection * connection,struct packet_info * pi)3800bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3801b411b363SPhilipp Reisner {
3802bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
38034a76b161SAndreas Gruenbacher 	int size = pi->size;
38044a76b161SAndreas Gruenbacher 
38054a76b161SAndreas Gruenbacher 	while (size) {
38064a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3807bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38084a76b161SAndreas Gruenbacher 		if (s <= 0) {
38094a76b161SAndreas Gruenbacher 			if (s < 0)
38104a76b161SAndreas Gruenbacher 				return s;
38114a76b161SAndreas Gruenbacher 			break;
38124a76b161SAndreas Gruenbacher 		}
38134a76b161SAndreas Gruenbacher 		size -= s;
38144a76b161SAndreas Gruenbacher 	}
38154a76b161SAndreas Gruenbacher 	if (size)
38164a76b161SAndreas Gruenbacher 		return -EIO;
38174a76b161SAndreas Gruenbacher 	return 0;
38184a76b161SAndreas Gruenbacher }
38194a76b161SAndreas Gruenbacher 
38204a76b161SAndreas Gruenbacher /*
38214a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38224a76b161SAndreas Gruenbacher  *
38234a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38244a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38254a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38264a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38274a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38284a76b161SAndreas Gruenbacher  *
38294a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38304a76b161SAndreas Gruenbacher  */
config_unknown_volume(struct drbd_connection * connection,struct packet_info * pi)3831bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38324a76b161SAndreas Gruenbacher {
38331ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38342fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3835bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38364a76b161SAndreas Gruenbacher }
38374a76b161SAndreas Gruenbacher 
receive_SyncParam(struct drbd_connection * connection,struct packet_info * pi)3838bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38394a76b161SAndreas Gruenbacher {
38409f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3841b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3842e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3843b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38443d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38453d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38462ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3847813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3848bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3849813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38506a365874SStephen Kitt 	unsigned int fifo_size = 0;
385182bc0194SAndreas Gruenbacher 	int err;
3852b411b363SPhilipp Reisner 
38539f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38549f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3855bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38569f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3857b411b363SPhilipp Reisner 
3858b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3859b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3860b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
38618e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
38628e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3863b411b363SPhilipp Reisner 
3864e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3865d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3866e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
386782bc0194SAndreas Gruenbacher 		return -EIO;
3868b411b363SPhilipp Reisner 	}
3869b411b363SPhilipp Reisner 
3870b411b363SPhilipp Reisner 	if (apv <= 88) {
3871e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3872e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38738e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3874e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3875e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38760b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
38778e26f9ccSPhilipp Reisner 	} else {
3878e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3879e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
38800b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3881b411b363SPhilipp Reisner 	}
3882b411b363SPhilipp Reisner 
3883b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3884e658983aSAndreas Gruenbacher 	p = pi->data;
388552a0cab3SKees Cook 	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
388652a0cab3SKees Cook 	memset(&p->algs, 0, sizeof(p->algs));
3887b411b363SPhilipp Reisner 
38889f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
388982bc0194SAndreas Gruenbacher 	if (err)
389082bc0194SAndreas Gruenbacher 		return err;
3891b411b363SPhilipp Reisner 
38920500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
38939f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3894b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3895daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3896daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3897b30ab791SAndreas Gruenbacher 			put_ldev(device);
38980500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3899d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3900daeda1ccSPhilipp Reisner 			return -ENOMEM;
3901f399002eSLars Ellenberg 		}
3902b411b363SPhilipp Reisner 
3903b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3904daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3905daeda1ccSPhilipp Reisner 
39066394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3907813472ceSPhilipp Reisner 	}
3908b411b363SPhilipp Reisner 
3909b411b363SPhilipp Reisner 	if (apv >= 88) {
3910b411b363SPhilipp Reisner 		if (apv == 88) {
39115de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3912d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39135de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3914b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3915813472ceSPhilipp Reisner 				goto reconnect;
3916b411b363SPhilipp Reisner 			}
3917b411b363SPhilipp Reisner 
39189f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3919813472ceSPhilipp Reisner 			if (err)
3920813472ceSPhilipp Reisner 				goto reconnect;
3921b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3922b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39230b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3924b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3925b411b363SPhilipp Reisner 
3926b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3927b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3928b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39290b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39300b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3931b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3932b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3933b411b363SPhilipp Reisner 		}
3934b411b363SPhilipp Reisner 
39352ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3936b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3937d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39382ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3939b411b363SPhilipp Reisner 				goto disconnect;
3940b411b363SPhilipp Reisner 			}
3941b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3942b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3943b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3944b411b363SPhilipp Reisner 				verify_tfm = NULL;
3945b411b363SPhilipp Reisner 				goto disconnect;
3946b411b363SPhilipp Reisner 			}
3947b411b363SPhilipp Reisner 		}
3948b411b363SPhilipp Reisner 
39492ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3950b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3951d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39522ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3953b411b363SPhilipp Reisner 				goto disconnect;
3954b411b363SPhilipp Reisner 			}
3955b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3956b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3957b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3958b411b363SPhilipp Reisner 				csums_tfm = NULL;
3959b411b363SPhilipp Reisner 				goto disconnect;
3960b411b363SPhilipp Reisner 			}
3961b411b363SPhilipp Reisner 		}
3962b411b363SPhilipp Reisner 
3963813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
3964daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3965daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3966daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3967daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3968778f271dSPhilipp Reisner 
3969daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3970b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
3971813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
3972813472ceSPhilipp Reisner 				if (!new_plan) {
3973d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
3974b30ab791SAndreas Gruenbacher 					put_ldev(device);
3975778f271dSPhilipp Reisner 					goto disconnect;
3976778f271dSPhilipp Reisner 				}
3977778f271dSPhilipp Reisner 			}
39788e26f9ccSPhilipp Reisner 		}
3979b411b363SPhilipp Reisner 
398091fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
39812ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
39828404e191SZhen Lei 			if (!new_net_conf)
398391fd4dadSPhilipp Reisner 				goto disconnect;
398491fd4dadSPhilipp Reisner 
39852ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
398691fd4dadSPhilipp Reisner 
3987b411b363SPhilipp Reisner 			if (verify_tfm) {
39882ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
39892ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
39903d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
39919f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
3992d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3993b411b363SPhilipp Reisner 			}
3994b411b363SPhilipp Reisner 			if (csums_tfm) {
39952ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
39962ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
39973d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
39989f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
3999d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4000b411b363SPhilipp Reisner 			}
4001bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
4002778f271dSPhilipp Reisner 		}
4003b411b363SPhilipp Reisner 	}
4004b411b363SPhilipp Reisner 
4005813472ceSPhilipp Reisner 	if (new_disk_conf) {
4006b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4007b30ab791SAndreas Gruenbacher 		put_ldev(device);
4008b411b363SPhilipp Reisner 	}
4009813472ceSPhilipp Reisner 
4010813472ceSPhilipp Reisner 	if (new_plan) {
4011b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4012b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4013813472ceSPhilipp Reisner 	}
4014daeda1ccSPhilipp Reisner 
40150500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4016daeda1ccSPhilipp Reisner 	synchronize_rcu();
4017daeda1ccSPhilipp Reisner 	if (new_net_conf)
4018daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4019daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4020813472ceSPhilipp Reisner 	kfree(old_plan);
4021daeda1ccSPhilipp Reisner 
402282bc0194SAndreas Gruenbacher 	return 0;
4023b411b363SPhilipp Reisner 
4024813472ceSPhilipp Reisner reconnect:
4025813472ceSPhilipp Reisner 	if (new_disk_conf) {
4026b30ab791SAndreas Gruenbacher 		put_ldev(device);
4027813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4028813472ceSPhilipp Reisner 	}
40290500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4030813472ceSPhilipp Reisner 	return -EIO;
4031813472ceSPhilipp Reisner 
4032b411b363SPhilipp Reisner disconnect:
4033813472ceSPhilipp Reisner 	kfree(new_plan);
4034813472ceSPhilipp Reisner 	if (new_disk_conf) {
4035b30ab791SAndreas Gruenbacher 		put_ldev(device);
4036813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4037813472ceSPhilipp Reisner 	}
40380500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4039b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4040b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40413d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4042b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40433d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40449f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
404582bc0194SAndreas Gruenbacher 	return -EIO;
4046b411b363SPhilipp Reisner }
4047b411b363SPhilipp Reisner 
4048b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
warn_if_differ_considerably(struct drbd_device * device,const char * s,sector_t a,sector_t b)4049b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4050b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4051b411b363SPhilipp Reisner {
4052b411b363SPhilipp Reisner 	sector_t d;
4053b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4054b411b363SPhilipp Reisner 		return;
4055b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4056b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4057d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4058b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4059b411b363SPhilipp Reisner }
4060b411b363SPhilipp Reisner 
receive_sizes(struct drbd_connection * connection,struct packet_info * pi)4061bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4062b411b363SPhilipp Reisner {
40639f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4064b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4065e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
40669104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4067e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
40686a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
406994c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4070b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4071e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4072b411b363SPhilipp Reisner 
40739f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
40749f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4075bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
40769f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4077155bd9d1SChristoph Hellwig 	cur_size = get_capacity(device->vdisk);
40784a76b161SAndreas Gruenbacher 
4079b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4080b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
40816a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4082b411b363SPhilipp Reisner 
4083b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4084b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4085b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4086b411b363SPhilipp Reisner 
4087b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4088daeda1ccSPhilipp Reisner 		rcu_read_lock();
4089b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4090daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4091daeda1ccSPhilipp Reisner 
4092b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4093b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4094b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4095daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4096b411b363SPhilipp Reisner 
4097b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4098b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4099b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4100daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4101b411b363SPhilipp Reisner 
4102ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4103ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4104ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
410560bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
410660bac040SLars Ellenberg 		if (new_size < cur_size &&
4107b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4108ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
410960bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
411060bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41119f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4112b30ab791SAndreas Gruenbacher 			put_ldev(device);
411382bc0194SAndreas Gruenbacher 			return -EIO;
4114b411b363SPhilipp Reisner 		}
4115daeda1ccSPhilipp Reisner 
4116daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4117daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4118daeda1ccSPhilipp Reisner 
4119daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4120daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4121b30ab791SAndreas Gruenbacher 				put_ldev(device);
4122daeda1ccSPhilipp Reisner 				return -ENOMEM;
4123daeda1ccSPhilipp Reisner 			}
4124daeda1ccSPhilipp Reisner 
41250500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4126b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4127daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4128daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4129daeda1ccSPhilipp Reisner 
4130b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41310500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
4132a77b2109SUladzislau Rezki (Sony) 			kvfree_rcu_mightsleep(old_disk_conf);
4133daeda1ccSPhilipp Reisner 
4134ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4135ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4136daeda1ccSPhilipp Reisner 		}
4137daeda1ccSPhilipp Reisner 
4138b30ab791SAndreas Gruenbacher 		put_ldev(device);
4139b411b363SPhilipp Reisner 	}
4140b411b363SPhilipp Reisner 
414120c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4142dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
414320c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4144dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
414520c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
414620c68fdeSLars Ellenberg 
4147e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4148b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41499104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4150b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4151b30ab791SAndreas Gruenbacher 		put_ldev(device);
4152e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
415382bc0194SAndreas Gruenbacher 			return -EIO;
4154b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4155b411b363SPhilipp Reisner 	} else {
41566a8d68b1SLars Ellenberg 		/*
41576a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
41586a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
41596a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
41606a8d68b1SLars Ellenberg 		 *
41616a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
41626a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
41636a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
41646a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
41656a8d68b1SLars Ellenberg 		 *
41666a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
41676a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
416894c43a13SLars Ellenberg 		 *
416994c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
417094c43a13SLars Ellenberg 		 * In which case we ignore this completely.
41716a8d68b1SLars Ellenberg 		 */
417294c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
41739104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
417494c43a13SLars Ellenberg 		if (new_size == 0) {
417594c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
417694c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
417794c43a13SLars Ellenberg 			/* nothing to do */
417894c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
417994c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
418094c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
418194c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
418294c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
418394c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
418494c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
418594c43a13SLars Ellenberg 			return -EIO;
418694c43a13SLars Ellenberg 		} else {
418794c43a13SLars Ellenberg 			/* I believe the peer, if
418894c43a13SLars Ellenberg 			 *  - I don't have a current size myself
418994c43a13SLars Ellenberg 			 *  - we agree on the size anyways
419094c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
419194c43a13SLars Ellenberg 			 *    and he has the only disk
419294c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
419394c43a13SLars Ellenberg 			 *    and he has the only disk,
419494c43a13SLars Ellenberg 			 *    which is larger than my current size
419594c43a13SLars Ellenberg 			 */
419694c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
419794c43a13SLars Ellenberg 		}
4198b411b363SPhilipp Reisner 	}
4199b411b363SPhilipp Reisner 
4200b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4201b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4202b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4203b411b363SPhilipp Reisner 			ldsc = 1;
4204b411b363SPhilipp Reisner 		}
4205b411b363SPhilipp Reisner 
4206b30ab791SAndreas Gruenbacher 		put_ldev(device);
4207b411b363SPhilipp Reisner 	}
4208b411b363SPhilipp Reisner 
4209b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4210155bd9d1SChristoph Hellwig 		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4211155bd9d1SChristoph Hellwig 		    ldsc) {
4212b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4213b411b363SPhilipp Reisner 			 * needs to know my new size... */
421469a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4215b411b363SPhilipp Reisner 		}
4216b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4217b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4218b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4219b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4220e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4221d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4222b411b363SPhilipp Reisner 				else
4223b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4224e89b591cSPhilipp Reisner 			} else
4225b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4226b411b363SPhilipp Reisner 		}
4227b411b363SPhilipp Reisner 	}
4228b411b363SPhilipp Reisner 
422982bc0194SAndreas Gruenbacher 	return 0;
4230b411b363SPhilipp Reisner }
4231b411b363SPhilipp Reisner 
receive_uuids(struct drbd_connection * connection,struct packet_info * pi)4232bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4233b411b363SPhilipp Reisner {
42349f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4235b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4236e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4237b411b363SPhilipp Reisner 	u64 *p_uuid;
423862b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4239b411b363SPhilipp Reisner 
42409f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42419f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4242bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42439f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42444a76b161SAndreas Gruenbacher 
4245365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42468404e191SZhen Lei 	if (!p_uuid)
4247063eacf8SJing Wang 		return false;
4248b411b363SPhilipp Reisner 
4249b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4250b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4251b411b363SPhilipp Reisner 
4252b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4253b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4254b411b363SPhilipp Reisner 
4255b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4256b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4257b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4258b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4259d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4260b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
42619f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
426282bc0194SAndreas Gruenbacher 		return -EIO;
4263b411b363SPhilipp Reisner 	}
4264b411b363SPhilipp Reisner 
4265b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4266b411b363SPhilipp Reisner 		int skip_initial_sync =
4267b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
42689f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4269b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4270b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4271b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4272d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4273b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
427420ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
42758164dd6cSAndreas Gruenbacher 					BM_LOCKED_TEST_ALLOWED, NULL);
4276b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4277b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4278b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4279b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4280b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
428162b0da3aSLars Ellenberg 			updated_uuids = 1;
4282b411b363SPhilipp Reisner 		}
4283b30ab791SAndreas Gruenbacher 		put_ldev(device);
4284b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4285b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
428618a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
428718a50fa2SPhilipp Reisner 		   for me. */
4288b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4289b411b363SPhilipp Reisner 	}
4290b411b363SPhilipp Reisner 
4291b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4292b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4293b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4294b411b363SPhilipp Reisner 	   new disk state... */
4295b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4296b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4297b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4298b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
429962b0da3aSLars Ellenberg 
430062b0da3aSLars Ellenberg 	if (updated_uuids)
4301b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4302b411b363SPhilipp Reisner 
430382bc0194SAndreas Gruenbacher 	return 0;
4304b411b363SPhilipp Reisner }
4305b411b363SPhilipp Reisner 
4306b411b363SPhilipp Reisner /**
4307b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4308b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4309b411b363SPhilipp Reisner  */
convert_state(union drbd_state ps)4310b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4311b411b363SPhilipp Reisner {
4312b411b363SPhilipp Reisner 	union drbd_state ms;
4313b411b363SPhilipp Reisner 
4314b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4315369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4316b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4317b411b363SPhilipp Reisner 
4318b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4319b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4320b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4321b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4322b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4323b411b363SPhilipp Reisner 	};
4324b411b363SPhilipp Reisner 
4325b411b363SPhilipp Reisner 	ms.i = ps.i;
4326b411b363SPhilipp Reisner 
4327b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4328b411b363SPhilipp Reisner 	ms.peer = ps.role;
4329b411b363SPhilipp Reisner 	ms.role = ps.peer;
4330b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4331b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4332b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4333b411b363SPhilipp Reisner 
4334b411b363SPhilipp Reisner 	return ms;
4335b411b363SPhilipp Reisner }
4336b411b363SPhilipp Reisner 
receive_req_state(struct drbd_connection * connection,struct packet_info * pi)4337bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4338b411b363SPhilipp Reisner {
43399f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4340b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4341e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4342b411b363SPhilipp Reisner 	union drbd_state mask, val;
4343bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4344b411b363SPhilipp Reisner 
43459f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43469f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43474a76b161SAndreas Gruenbacher 		return -EIO;
43489f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43494a76b161SAndreas Gruenbacher 
4350b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4351b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4352b411b363SPhilipp Reisner 
43539f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4354b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
435569a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
435682bc0194SAndreas Gruenbacher 		return 0;
4357b411b363SPhilipp Reisner 	}
4358b411b363SPhilipp Reisner 
4359b411b363SPhilipp Reisner 	mask = convert_state(mask);
4360b411b363SPhilipp Reisner 	val = convert_state(val);
4361b411b363SPhilipp Reisner 
4362b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
436369a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4364047cd4a6SPhilipp Reisner 
4365b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4366b411b363SPhilipp Reisner 
436782bc0194SAndreas Gruenbacher 	return 0;
4368b411b363SPhilipp Reisner }
4369b411b363SPhilipp Reisner 
receive_req_conn_state(struct drbd_connection * connection,struct packet_info * pi)4370bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4371b411b363SPhilipp Reisner {
4372e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4373dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4374dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4375dfafcc8aSPhilipp Reisner 
4376dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4377dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4378dfafcc8aSPhilipp Reisner 
4379bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4380bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4381bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
438282bc0194SAndreas Gruenbacher 		return 0;
4383dfafcc8aSPhilipp Reisner 	}
4384dfafcc8aSPhilipp Reisner 
4385dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4386dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4387dfafcc8aSPhilipp Reisner 
4388bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4389bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4390dfafcc8aSPhilipp Reisner 
439182bc0194SAndreas Gruenbacher 	return 0;
4392dfafcc8aSPhilipp Reisner }
4393dfafcc8aSPhilipp Reisner 
receive_state(struct drbd_connection * connection,struct packet_info * pi)4394bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4395b411b363SPhilipp Reisner {
43969f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4397b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4398e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
43994ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4400b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
440165d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4402b411b363SPhilipp Reisner 	int rv;
4403b411b363SPhilipp Reisner 
44049f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44059f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4406bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44079f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44084a76b161SAndreas Gruenbacher 
4409b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4410b411b363SPhilipp Reisner 
4411b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4412b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4413b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4414d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4415b411b363SPhilipp Reisner 	}
4416b411b363SPhilipp Reisner 
44170500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4418b411b363SPhilipp Reisner  retry:
4419b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4421b411b363SPhilipp Reisner 
4422668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4423545752d5SLars Ellenberg 	 * already decided to close the connection again,
4424545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4425545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
442658ffa580SLars Ellenberg 		return -ECONNRESET;
4427545752d5SLars Ellenberg 
442840424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
442940424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
443040424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
443140424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
443240424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
443340424e4aSLars Ellenberg 	 */
443440424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
443540424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4436e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4437e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4438e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4439e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4440e9ef7bb6SLars Ellenberg 		 * syncing states.
4441e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4442e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4443e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4444e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4445e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4446e9ef7bb6SLars Ellenberg 
4447e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4448e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4449e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4450e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4451e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4452b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
44530d11f3cfSChristoph Böhmwalder 				drbd_resync_finished(peer_device);
445482bc0194SAndreas Gruenbacher 			return 0;
4455e9ef7bb6SLars Ellenberg 		}
4456e9ef7bb6SLars Ellenberg 	}
4457e9ef7bb6SLars Ellenberg 
445802b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
445902b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
446002b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
44610d11f3cfSChristoph Böhmwalder 		ov_out_of_sync_print(peer_device);
44620d11f3cfSChristoph Böhmwalder 		drbd_resync_finished(peer_device);
446358ffa580SLars Ellenberg 		return 0;
446402b91b55SLars Ellenberg 	}
446502b91b55SLars Ellenberg 
4466e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4467e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4468e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4469e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4470e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4471e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4472e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4473e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4474e9ef7bb6SLars Ellenberg 
44754ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
44764ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4477b411b363SPhilipp Reisner 
447867531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
447967531718SPhilipp Reisner 		ns.conn = C_BEHIND;
448067531718SPhilipp Reisner 
4481fe43ed97SLars Ellenberg 	/* TODO:
4482fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4483fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4484fe43ed97SLars Ellenberg 	 *
4485fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4486fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4487fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4488fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4489fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4490fe43ed97SLars Ellenberg 	 *
4491fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4492fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4493fe43ed97SLars Ellenberg 	 *
4494fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4495fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4496fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4497fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4498fe43ed97SLars Ellenberg 	 */
4499fe43ed97SLars Ellenberg 
4500b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4501b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4502b411b363SPhilipp Reisner 		int cr; /* consider resync */
4503b411b363SPhilipp Reisner 
4504b411b363SPhilipp Reisner 		/* if we established a new connection */
45054ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4506b411b363SPhilipp Reisner 		/* if we had an established connection
4507b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45084ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4509b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45104ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4511b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4512a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4513b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4514b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4515b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45164ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4517b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4518b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4519b411b363SPhilipp Reisner 
4520b411b363SPhilipp Reisner 		if (cr)
452169a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4522b411b363SPhilipp Reisner 
4523b30ab791SAndreas Gruenbacher 		put_ldev(device);
45244ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45254ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4526b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4527b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4528b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4529d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4530b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4531580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4532b411b363SPhilipp Reisner 			} else {
45339f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
453482bc0194SAndreas Gruenbacher 					return -EIO;
45350b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45369f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
453782bc0194SAndreas Gruenbacher 				return -EIO;
4538b411b363SPhilipp Reisner 			}
4539b411b363SPhilipp Reisner 		}
4540b411b363SPhilipp Reisner 	}
4541b411b363SPhilipp Reisner 
45420500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4543b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4544b411b363SPhilipp Reisner 		goto retry;
4545b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4546b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4547b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4548b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45494ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4550b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45514ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4552b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4553b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
45548554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4555481c6f50SPhilipp Reisner 		   for temporal network outages! */
45560500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4557d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45589f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4559b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4560b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
45619f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
456282bc0194SAndreas Gruenbacher 		return -EIO;
4563481c6f50SPhilipp Reisner 	}
4564b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4565b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
45660500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4567b411b363SPhilipp Reisner 
4568b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
45699f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
457082bc0194SAndreas Gruenbacher 		return -EIO;
4571b411b363SPhilipp Reisner 	}
4572b411b363SPhilipp Reisner 
45734ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
45744ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4575b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4576b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4577b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4578b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
457969a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
458069a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4581b411b363SPhilipp Reisner 		}
4582b411b363SPhilipp Reisner 	}
4583b411b363SPhilipp Reisner 
4584b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4585b411b363SPhilipp Reisner 
4586b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4587b411b363SPhilipp Reisner 
458882bc0194SAndreas Gruenbacher 	return 0;
4589b411b363SPhilipp Reisner }
4590b411b363SPhilipp Reisner 
receive_sync_uuid(struct drbd_connection * connection,struct packet_info * pi)4591bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4592b411b363SPhilipp Reisner {
45939f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4594b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4595e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
45964a76b161SAndreas Gruenbacher 
45979f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
45989f4fe9adSAndreas Gruenbacher 	if (!peer_device)
45994a76b161SAndreas Gruenbacher 		return -EIO;
46009f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4601b411b363SPhilipp Reisner 
4602b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4603b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4604b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4605b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4606b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4607b411b363SPhilipp Reisner 
46080b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4609b411b363SPhilipp Reisner 
4610b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4611b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4612b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4613b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4614b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4615b411b363SPhilipp Reisner 
4616b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4617b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4618b411b363SPhilipp Reisner 
4619b30ab791SAndreas Gruenbacher 		put_ldev(device);
4620b411b363SPhilipp Reisner 	} else
4621d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4622b411b363SPhilipp Reisner 
462382bc0194SAndreas Gruenbacher 	return 0;
4624b411b363SPhilipp Reisner }
4625b411b363SPhilipp Reisner 
46269b48ff07SLee Jones /*
46272c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46282c46407dSAndreas Gruenbacher  *
46292c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46302c46407dSAndreas Gruenbacher  * code upon failure.
46312c46407dSAndreas Gruenbacher  */
46322c46407dSAndreas Gruenbacher static int
receive_bitmap_plain(struct drbd_peer_device * peer_device,unsigned int size,unsigned long * p,struct bm_xfer_ctx * c)463369a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4634e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4635b411b363SPhilipp Reisner {
463650d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
463769a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4638e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
463950d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4640e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46412c46407dSAndreas Gruenbacher 	int err;
4642b411b363SPhilipp Reisner 
464350d0b1adSAndreas Gruenbacher 	if (want != size) {
464469a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46452c46407dSAndreas Gruenbacher 		return -EIO;
4646b411b363SPhilipp Reisner 	}
4647b411b363SPhilipp Reisner 	if (want == 0)
46482c46407dSAndreas Gruenbacher 		return 0;
464969a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
465082bc0194SAndreas Gruenbacher 	if (err)
46512c46407dSAndreas Gruenbacher 		return err;
4652b411b363SPhilipp Reisner 
465369a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4654b411b363SPhilipp Reisner 
4655b411b363SPhilipp Reisner 	c->word_offset += num_words;
4656b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4657b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4658b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4659b411b363SPhilipp Reisner 
46602c46407dSAndreas Gruenbacher 	return 1;
4661b411b363SPhilipp Reisner }
4662b411b363SPhilipp Reisner 
dcbp_get_code(struct p_compressed_bm * p)4663a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4664a02d1240SAndreas Gruenbacher {
4665a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4666a02d1240SAndreas Gruenbacher }
4667a02d1240SAndreas Gruenbacher 
dcbp_get_start(struct p_compressed_bm * p)4668a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4669a02d1240SAndreas Gruenbacher {
4670a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4671a02d1240SAndreas Gruenbacher }
4672a02d1240SAndreas Gruenbacher 
dcbp_get_pad_bits(struct p_compressed_bm * p)4673a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4674a02d1240SAndreas Gruenbacher {
4675a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4676a02d1240SAndreas Gruenbacher }
4677a02d1240SAndreas Gruenbacher 
46789b48ff07SLee Jones /*
46792c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
46802c46407dSAndreas Gruenbacher  *
46812c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46822c46407dSAndreas Gruenbacher  * code upon failure.
46832c46407dSAndreas Gruenbacher  */
46842c46407dSAndreas Gruenbacher static int
recv_bm_rle_bits(struct drbd_peer_device * peer_device,struct p_compressed_bm * p,struct bm_xfer_ctx * c,unsigned int len)468569a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4686b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4687c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4688c6d25cfeSPhilipp Reisner 		 unsigned int len)
4689b411b363SPhilipp Reisner {
4690b411b363SPhilipp Reisner 	struct bitstream bs;
4691b411b363SPhilipp Reisner 	u64 look_ahead;
4692b411b363SPhilipp Reisner 	u64 rl;
4693b411b363SPhilipp Reisner 	u64 tmp;
4694b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4695b411b363SPhilipp Reisner 	unsigned long e;
4696a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4697b411b363SPhilipp Reisner 	int have;
4698b411b363SPhilipp Reisner 	int bits;
4699b411b363SPhilipp Reisner 
4700a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4701b411b363SPhilipp Reisner 
4702b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4703b411b363SPhilipp Reisner 	if (bits < 0)
47042c46407dSAndreas Gruenbacher 		return -EIO;
4705b411b363SPhilipp Reisner 
4706b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4707b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4708b411b363SPhilipp Reisner 		if (bits <= 0)
47092c46407dSAndreas Gruenbacher 			return -EIO;
4710b411b363SPhilipp Reisner 
4711b411b363SPhilipp Reisner 		if (toggle) {
4712b411b363SPhilipp Reisner 			e = s + rl -1;
4713b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
471469a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47152c46407dSAndreas Gruenbacher 				return -EIO;
4716b411b363SPhilipp Reisner 			}
471769a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4718b411b363SPhilipp Reisner 		}
4719b411b363SPhilipp Reisner 
4720b411b363SPhilipp Reisner 		if (have < bits) {
472169a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4722b411b363SPhilipp Reisner 				have, bits, look_ahead,
4723b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4724b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47252c46407dSAndreas Gruenbacher 			return -EIO;
4726b411b363SPhilipp Reisner 		}
4727d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4728d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4729b411b363SPhilipp Reisner 			look_ahead >>= bits;
4730d2da5b0cSLars Ellenberg 		else
4731d2da5b0cSLars Ellenberg 			look_ahead = 0;
4732b411b363SPhilipp Reisner 		have -= bits;
4733b411b363SPhilipp Reisner 
4734b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4735b411b363SPhilipp Reisner 		if (bits < 0)
47362c46407dSAndreas Gruenbacher 			return -EIO;
4737b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4738b411b363SPhilipp Reisner 		have += bits;
4739b411b363SPhilipp Reisner 	}
4740b411b363SPhilipp Reisner 
4741b411b363SPhilipp Reisner 	c->bit_offset = s;
4742b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4743b411b363SPhilipp Reisner 
47442c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4745b411b363SPhilipp Reisner }
4746b411b363SPhilipp Reisner 
47479b48ff07SLee Jones /*
47482c46407dSAndreas Gruenbacher  * decode_bitmap_c
47492c46407dSAndreas Gruenbacher  *
47502c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47512c46407dSAndreas Gruenbacher  * code upon failure.
47522c46407dSAndreas Gruenbacher  */
47532c46407dSAndreas Gruenbacher static int
decode_bitmap_c(struct drbd_peer_device * peer_device,struct p_compressed_bm * p,struct bm_xfer_ctx * c,unsigned int len)475469a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4755b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4756c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4757c6d25cfeSPhilipp Reisner 		unsigned int len)
4758b411b363SPhilipp Reisner {
4759a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
476069a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4761b411b363SPhilipp Reisner 
4762b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4763b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4764b411b363SPhilipp Reisner 	 * during all our tests. */
4765b411b363SPhilipp Reisner 
476669a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
476769a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
47682c46407dSAndreas Gruenbacher 	return -EIO;
4769b411b363SPhilipp Reisner }
4770b411b363SPhilipp Reisner 
INFO_bm_xfer_stats(struct drbd_peer_device * peer_device,const char * direction,struct bm_xfer_ctx * c)47715e54c2a6SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
4772b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4773b411b363SPhilipp Reisner {
4774b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
47755e54c2a6SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(peer_device->connection);
477650d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
477750d0b1adSAndreas Gruenbacher 	unsigned int plain =
477850d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
477950d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
478050d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
478150d0b1adSAndreas Gruenbacher 	unsigned int r;
4782b411b363SPhilipp Reisner 
4783b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4784b411b363SPhilipp Reisner 	if (total == 0)
4785b411b363SPhilipp Reisner 		return;
4786b411b363SPhilipp Reisner 
4787b411b363SPhilipp Reisner 	/* don't report if not compressed */
4788b411b363SPhilipp Reisner 	if (total >= plain)
4789b411b363SPhilipp Reisner 		return;
4790b411b363SPhilipp Reisner 
4791b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4792b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4793b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4794b411b363SPhilipp Reisner 
4795b411b363SPhilipp Reisner 	if (r > 1000)
4796b411b363SPhilipp Reisner 		r = 1000;
4797b411b363SPhilipp Reisner 
4798b411b363SPhilipp Reisner 	r = 1000 - r;
47995e54c2a6SAndreas Gruenbacher 	drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4800b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4801b411b363SPhilipp Reisner 			direction,
4802b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4803b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4804b411b363SPhilipp Reisner 			total, r/10, r % 10);
4805b411b363SPhilipp Reisner }
4806b411b363SPhilipp Reisner 
4807b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4808b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4809b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4810b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4811b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4812b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4813b411b363SPhilipp Reisner 
4814b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
receive_bitmap(struct drbd_connection * connection,struct packet_info * pi)4815bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4816b411b363SPhilipp Reisner {
48179f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4818b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4819b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48202c46407dSAndreas Gruenbacher 	int err;
48214a76b161SAndreas Gruenbacher 
48229f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48239f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48244a76b161SAndreas Gruenbacher 		return -EIO;
48259f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4826b411b363SPhilipp Reisner 
4827b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
482820ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
482920ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4830b411b363SPhilipp Reisner 
4831b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4832b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4833b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4834b411b363SPhilipp Reisner 	};
4835b411b363SPhilipp Reisner 
48362c46407dSAndreas Gruenbacher 	for(;;) {
4837e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
483869a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4839e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4840b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4841b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4842e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4843b411b363SPhilipp Reisner 
4844bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4845d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
484682bc0194SAndreas Gruenbacher 				err = -EIO;
4847b411b363SPhilipp Reisner 				goto out;
4848b411b363SPhilipp Reisner 			}
4849e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4850d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
485182bc0194SAndreas Gruenbacher 				err = -EIO;
485278fcbdaeSAndreas Gruenbacher 				goto out;
4853b411b363SPhilipp Reisner 			}
48549f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4855e658983aSAndreas Gruenbacher 			if (err)
4856e658983aSAndreas Gruenbacher 			       goto out;
485769a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4858b411b363SPhilipp Reisner 		} else {
4859d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
486082bc0194SAndreas Gruenbacher 			err = -EIO;
4861b411b363SPhilipp Reisner 			goto out;
4862b411b363SPhilipp Reisner 		}
4863b411b363SPhilipp Reisner 
4864e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4865bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4866b411b363SPhilipp Reisner 
48672c46407dSAndreas Gruenbacher 		if (err <= 0) {
48682c46407dSAndreas Gruenbacher 			if (err < 0)
48692c46407dSAndreas Gruenbacher 				goto out;
4870b411b363SPhilipp Reisner 			break;
48712c46407dSAndreas Gruenbacher 		}
48729f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
487382bc0194SAndreas Gruenbacher 		if (err)
4874b411b363SPhilipp Reisner 			goto out;
48752c46407dSAndreas Gruenbacher 	}
4876b411b363SPhilipp Reisner 
48775e54c2a6SAndreas Gruenbacher 	INFO_bm_xfer_stats(peer_device, "receive", &c);
4878b411b363SPhilipp Reisner 
4879b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4880de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4881de1f8e4aSAndreas Gruenbacher 
48828164dd6cSAndreas Gruenbacher 		err = drbd_send_bitmap(device, peer_device);
488382bc0194SAndreas Gruenbacher 		if (err)
4884b411b363SPhilipp Reisner 			goto out;
4885b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4886b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
48870b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4888b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4889b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4890b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4891d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4892b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4893b411b363SPhilipp Reisner 	}
489482bc0194SAndreas Gruenbacher 	err = 0;
4895b411b363SPhilipp Reisner 
4896b411b363SPhilipp Reisner  out:
4897b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4898b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4899b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
490082bc0194SAndreas Gruenbacher 	return err;
4901b411b363SPhilipp Reisner }
4902b411b363SPhilipp Reisner 
receive_skip(struct drbd_connection * connection,struct packet_info * pi)4903bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4904b411b363SPhilipp Reisner {
49051ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4906e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4907b411b363SPhilipp Reisner 
4908bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4909b411b363SPhilipp Reisner }
4910b411b363SPhilipp Reisner 
receive_UnplugRemote(struct drbd_connection * connection,struct packet_info * pi)4911bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4912b411b363SPhilipp Reisner {
4913b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4914b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4915ddd061b8SChristoph Hellwig 	tcp_sock_set_quickack(connection->data.socket->sk, 2);
491682bc0194SAndreas Gruenbacher 	return 0;
4917b411b363SPhilipp Reisner }
4918b411b363SPhilipp Reisner 
receive_out_of_sync(struct drbd_connection * connection,struct packet_info * pi)4919bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
492073a01a18SPhilipp Reisner {
49219f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4922b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4923e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49244a76b161SAndreas Gruenbacher 
49259f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49269f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49274a76b161SAndreas Gruenbacher 		return -EIO;
49289f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
492973a01a18SPhilipp Reisner 
4930b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4931f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4932f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4933f735e363SLars Ellenberg 	case C_BEHIND:
4934f735e363SLars Ellenberg 			break;
4935f735e363SLars Ellenberg 	default:
4936d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4937b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4938f735e363SLars Ellenberg 	}
4939f735e363SLars Ellenberg 
49400d11f3cfSChristoph Böhmwalder 	drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
494173a01a18SPhilipp Reisner 
494282bc0194SAndreas Gruenbacher 	return 0;
494373a01a18SPhilipp Reisner }
494473a01a18SPhilipp Reisner 
receive_rs_deallocated(struct drbd_connection * connection,struct packet_info * pi)4945700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4946700ca8c0SPhilipp Reisner {
4947700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4948700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4949700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4950700ca8c0SPhilipp Reisner 	sector_t sector;
4951700ca8c0SPhilipp Reisner 	int size, err = 0;
4952700ca8c0SPhilipp Reisner 
4953700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
4954700ca8c0SPhilipp Reisner 	if (!peer_device)
4955700ca8c0SPhilipp Reisner 		return -EIO;
4956700ca8c0SPhilipp Reisner 	device = peer_device->device;
4957700ca8c0SPhilipp Reisner 
4958700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
4959700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
4960700ca8c0SPhilipp Reisner 
49610d11f3cfSChristoph Böhmwalder 	dec_rs_pending(peer_device);
4962700ca8c0SPhilipp Reisner 
4963700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
4964700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
4965700ca8c0SPhilipp Reisner 
4966700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
49679104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
4968700ca8c0SPhilipp Reisner 		if (!peer_req) {
4969700ca8c0SPhilipp Reisner 			put_ldev(device);
4970700ca8c0SPhilipp Reisner 			return -ENOMEM;
4971700ca8c0SPhilipp Reisner 		}
4972700ca8c0SPhilipp Reisner 
4973700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
4974ce668b6dSChristoph Böhmwalder 		peer_req->opf = REQ_OP_DISCARD;
4975700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
4976f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
4977700ca8c0SPhilipp Reisner 
4978700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
4979700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
4980700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
4981700ca8c0SPhilipp Reisner 
4982700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
4983ce668b6dSChristoph Böhmwalder 		err = drbd_submit_peer_request(peer_req);
4984700ca8c0SPhilipp Reisner 
4985700ca8c0SPhilipp Reisner 		if (err) {
4986700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
4987700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
4988700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
4989700ca8c0SPhilipp Reisner 
4990700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
4991700ca8c0SPhilipp Reisner 			put_ldev(device);
4992700ca8c0SPhilipp Reisner 			err = 0;
4993700ca8c0SPhilipp Reisner 			goto fail;
4994700ca8c0SPhilipp Reisner 		}
4995700ca8c0SPhilipp Reisner 
4996700ca8c0SPhilipp Reisner 		inc_unacked(device);
4997700ca8c0SPhilipp Reisner 
4998700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4999700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
5000700ca8c0SPhilipp Reisner 	} else {
5001700ca8c0SPhilipp Reisner 	fail:
5002700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
5003700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5004700ca8c0SPhilipp Reisner 	}
5005700ca8c0SPhilipp Reisner 
5006700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5007700ca8c0SPhilipp Reisner 
5008700ca8c0SPhilipp Reisner 	return err;
5009700ca8c0SPhilipp Reisner }
5010700ca8c0SPhilipp Reisner 
501102918be2SPhilipp Reisner struct data_cmd {
501202918be2SPhilipp Reisner 	int expect_payload;
50139104d31aSLars Ellenberg 	unsigned int pkt_size;
5014bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5015b411b363SPhilipp Reisner };
5016b411b363SPhilipp Reisner 
501702918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
501802918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
501902918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
502002918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
502102918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5022e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5023e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5024e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
502502918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
502602918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5027e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5028e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
502902918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
503002918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
503102918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
503202918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
503302918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
503402918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
503502918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
503602918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
503702918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5038700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
503902918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
504073a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50414a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5042036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5043a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5044f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5045700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
504602918be2SPhilipp Reisner };
504702918be2SPhilipp Reisner 
drbdd(struct drbd_connection * connection)5048bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5049b411b363SPhilipp Reisner {
505077351055SPhilipp Reisner 	struct packet_info pi;
505102918be2SPhilipp Reisner 	size_t shs; /* sub header size */
505282bc0194SAndreas Gruenbacher 	int err;
5053b411b363SPhilipp Reisner 
5054bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
50559104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5056deebe195SAndreas Gruenbacher 
5057bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5058c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5059c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
506002918be2SPhilipp Reisner 			goto err_out;
506102918be2SPhilipp Reisner 
5062deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
50634a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
50641ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
50652fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
506602918be2SPhilipp Reisner 			goto err_out;
50670b33a916SLars Ellenberg 		}
5068b411b363SPhilipp Reisner 
5069e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
50709104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
50719104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5072e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
50731ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
50742fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5075c13f7e1aSLars Ellenberg 			goto err_out;
5076c13f7e1aSLars Ellenberg 		}
50779104d31aSLars Ellenberg 		if (pi.size < shs) {
50789104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
50799104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
50809104d31aSLars Ellenberg 			goto err_out;
50819104d31aSLars Ellenberg 		}
5082c13f7e1aSLars Ellenberg 
5083c13f7e1aSLars Ellenberg 		if (shs) {
5084944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5085bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5086a5c31904SAndreas Gruenbacher 			if (err)
508702918be2SPhilipp Reisner 				goto err_out;
5088e2857216SAndreas Gruenbacher 			pi.size -= shs;
5089b411b363SPhilipp Reisner 		}
509002918be2SPhilipp Reisner 
5091944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5092bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
50934a76b161SAndreas Gruenbacher 		if (err) {
50941ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
50959f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
509602918be2SPhilipp Reisner 			goto err_out;
509702918be2SPhilipp Reisner 		}
509802918be2SPhilipp Reisner 	}
509982bc0194SAndreas Gruenbacher 	return;
510002918be2SPhilipp Reisner 
510102918be2SPhilipp Reisner     err_out:
5102bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5103b411b363SPhilipp Reisner }
5104b411b363SPhilipp Reisner 
conn_disconnect(struct drbd_connection * connection)5105bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5106f70b3511SPhilipp Reisner {
5107c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5108bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5109376694a0SPhilipp Reisner 	int vnr;
5110f70b3511SPhilipp Reisner 
5111bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5112b411b363SPhilipp Reisner 		return;
5113b411b363SPhilipp Reisner 
5114545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5115545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5116545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5117545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5118545752d5SLars Ellenberg 	 */
5119bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5120545752d5SLars Ellenberg 
5121668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51221c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5123668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5124668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5125668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5126668700b4SPhilipp Reisner 	}
5127bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5128360cc740SPhilipp Reisner 
5129c141ebdaSPhilipp Reisner 	rcu_read_lock();
5130c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5131c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5132b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5133c141ebdaSPhilipp Reisner 		rcu_read_unlock();
513469a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5135c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5136c141ebdaSPhilipp Reisner 		rcu_read_lock();
5137c141ebdaSPhilipp Reisner 	}
5138c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5139c141ebdaSPhilipp Reisner 
5140bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51411ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
514212038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5143bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5144bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
514512038a3aSPhilipp Reisner 
51461ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5147360cc740SPhilipp Reisner 
5148bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5149bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5150cb703454SPhilipp Reisner 
51510500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5152bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5153bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5154bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5155bbeb641cSPhilipp Reisner 
51560500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5157360cc740SPhilipp Reisner 
5158f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5159bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5160360cc740SPhilipp Reisner }
5161360cc740SPhilipp Reisner 
drbd_disconnected(struct drbd_peer_device * peer_device)516269a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5163360cc740SPhilipp Reisner {
516469a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5165360cc740SPhilipp Reisner 	unsigned int i;
5166b411b363SPhilipp Reisner 
516785719573SPhilipp Reisner 	/* wait for current activity to cease. */
51680500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5169b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5170b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5171b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
51720500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5173b411b363SPhilipp Reisner 
5174b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5175b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5176b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5177b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5178b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5179b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5180b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5181b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5182b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5183b411b363SPhilipp Reisner 	 *  on the fly. */
5184b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5185b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5186b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5187b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5188b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5189b411b363SPhilipp Reisner 
5190b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
51912bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5192b411b363SPhilipp Reisner 
5193b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5194b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5195b411b363SPhilipp Reisner 	 * to be "canceled" */
5196b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5197b411b363SPhilipp Reisner 
5198b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5199b411b363SPhilipp Reisner 
5200d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5201d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5202d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5203b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5204d10b4ea3SPhilipp Reisner 
520508332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
520608332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5207b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5208b411b363SPhilipp Reisner 
5209b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5210b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5211b411b363SPhilipp Reisner 
5212b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
521369a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5214b411b363SPhilipp Reisner 
5215b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5216b411b363SPhilipp Reisner 
5217be115b69SLars Ellenberg 	if (get_ldev(device)) {
5218be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
52198164dd6cSAndreas Gruenbacher 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
5220be115b69SLars Ellenberg 		put_ldev(device);
5221be115b69SLars Ellenberg 	}
522220ceb2b2SLars Ellenberg 
5223b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5224b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5225b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5226b411b363SPhilipp Reisner 	 *
5227b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5228b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5229b411b363SPhilipp Reisner 	 */
5230b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5231b411b363SPhilipp Reisner 	if (i)
5232d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5233b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5234435f0740SLars Ellenberg 	if (i)
5235d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5236b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5237b411b363SPhilipp Reisner 	if (i)
5238d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5239b411b363SPhilipp Reisner 
52400b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52410b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52420b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52430b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5244b411b363SPhilipp Reisner 
5245360cc740SPhilipp Reisner 	return 0;
5246b411b363SPhilipp Reisner }
5247b411b363SPhilipp Reisner 
5248b411b363SPhilipp Reisner /*
5249b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5250b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5251b411b363SPhilipp Reisner  *
5252b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5253b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5254b411b363SPhilipp Reisner  *
5255b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5256b411b363SPhilipp Reisner  */
drbd_send_features(struct drbd_connection * connection)5257bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5258b411b363SPhilipp Reisner {
52599f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
52609f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5261b411b363SPhilipp Reisner 
5262bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5263bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
52649f5bdc33SAndreas Gruenbacher 	if (!p)
5265e8d17b01SAndreas Gruenbacher 		return -EIO;
5266b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5267b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5268b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
526920c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5270bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5271b411b363SPhilipp Reisner }
5272b411b363SPhilipp Reisner 
5273b411b363SPhilipp Reisner /*
5274b411b363SPhilipp Reisner  * return values:
5275b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5276b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5277b411b363SPhilipp Reisner  *  -1 peer talks different language,
5278b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5279b411b363SPhilipp Reisner  */
drbd_do_features(struct drbd_connection * connection)5280bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5281b411b363SPhilipp Reisner {
5282bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5283e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5284e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
528577351055SPhilipp Reisner 	struct packet_info pi;
5286a5c31904SAndreas Gruenbacher 	int err;
5287b411b363SPhilipp Reisner 
5288bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5289e8d17b01SAndreas Gruenbacher 	if (err)
5290b411b363SPhilipp Reisner 		return 0;
5291b411b363SPhilipp Reisner 
5292bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
529369bc7bc3SAndreas Gruenbacher 	if (err)
5294b411b363SPhilipp Reisner 		return 0;
5295b411b363SPhilipp Reisner 
52966038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
52971ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
529877351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5299b411b363SPhilipp Reisner 		return -1;
5300b411b363SPhilipp Reisner 	}
5301b411b363SPhilipp Reisner 
530277351055SPhilipp Reisner 	if (pi.size != expect) {
53031ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
530477351055SPhilipp Reisner 		     expect, pi.size);
5305b411b363SPhilipp Reisner 		return -1;
5306b411b363SPhilipp Reisner 	}
5307b411b363SPhilipp Reisner 
5308e658983aSAndreas Gruenbacher 	p = pi.data;
5309bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5310a5c31904SAndreas Gruenbacher 	if (err)
5311b411b363SPhilipp Reisner 		return 0;
5312b411b363SPhilipp Reisner 
5313b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5314b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5315b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5316b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5317b411b363SPhilipp Reisner 
5318b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5319b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5320b411b363SPhilipp Reisner 		goto incompat;
5321b411b363SPhilipp Reisner 
5322bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
532320c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5324b411b363SPhilipp Reisner 
53251ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5326bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5327b411b363SPhilipp Reisner 
5328f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53299104d31aSLars Ellenberg 		  connection->agreed_features,
53309104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53319104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5332f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5333f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53349104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
533592d94ae6SPhilipp Reisner 
5336b411b363SPhilipp Reisner 	return 1;
5337b411b363SPhilipp Reisner 
5338b411b363SPhilipp Reisner  incompat:
53391ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5340b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5341b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5342b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5343b411b363SPhilipp Reisner 	return -1;
5344b411b363SPhilipp Reisner }
5345b411b363SPhilipp Reisner 
5346b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
drbd_do_auth(struct drbd_connection * connection)5347bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5348b411b363SPhilipp Reisner {
53491ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53501ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5351b10d96cbSJohannes Thoma 	return -1;
5352b411b363SPhilipp Reisner }
5353b411b363SPhilipp Reisner #else
5354b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5355b10d96cbSJohannes Thoma 
5356b10d96cbSJohannes Thoma /* Return value:
5357b10d96cbSJohannes Thoma 	1 - auth succeeded,
5358b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5359b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5360b10d96cbSJohannes Thoma */
5361b10d96cbSJohannes Thoma 
drbd_do_auth(struct drbd_connection * connection)5362bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5363b411b363SPhilipp Reisner {
53649f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5365b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5366b411b363SPhilipp Reisner 	char *response = NULL;
5367b411b363SPhilipp Reisner 	char *right_response = NULL;
5368b411b363SPhilipp Reisner 	char *peers_ch = NULL;
536944ed167dSPhilipp Reisner 	unsigned int key_len;
537044ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5371b411b363SPhilipp Reisner 	unsigned int resp_size;
537277ce56e2SArnd Bergmann 	struct shash_desc *desc;
537377351055SPhilipp Reisner 	struct packet_info pi;
537444ed167dSPhilipp Reisner 	struct net_conf *nc;
537569bc7bc3SAndreas Gruenbacher 	int err, rv;
5376b411b363SPhilipp Reisner 
53779f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
53789f5bdc33SAndreas Gruenbacher 
537944ed167dSPhilipp Reisner 	rcu_read_lock();
5380bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
538144ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
538244ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
538344ed167dSPhilipp Reisner 	rcu_read_unlock();
538444ed167dSPhilipp Reisner 
538577ce56e2SArnd Bergmann 	desc = kmalloc(sizeof(struct shash_desc) +
538677ce56e2SArnd Bergmann 		       crypto_shash_descsize(connection->cram_hmac_tfm),
538777ce56e2SArnd Bergmann 		       GFP_KERNEL);
538877ce56e2SArnd Bergmann 	if (!desc) {
538977ce56e2SArnd Bergmann 		rv = -1;
539077ce56e2SArnd Bergmann 		goto fail;
539177ce56e2SArnd Bergmann 	}
53929534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5393b411b363SPhilipp Reisner 
53949534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5395b411b363SPhilipp Reisner 	if (rv) {
53969534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5397b10d96cbSJohannes Thoma 		rv = -1;
5398b411b363SPhilipp Reisner 		goto fail;
5399b411b363SPhilipp Reisner 	}
5400b411b363SPhilipp Reisner 
5401b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5402b411b363SPhilipp Reisner 
5403bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5404bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54059f5bdc33SAndreas Gruenbacher 		rv = 0;
54069f5bdc33SAndreas Gruenbacher 		goto fail;
54079f5bdc33SAndreas Gruenbacher 	}
5408bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54099f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5410b411b363SPhilipp Reisner 	if (!rv)
5411b411b363SPhilipp Reisner 		goto fail;
5412b411b363SPhilipp Reisner 
5413bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
541469bc7bc3SAndreas Gruenbacher 	if (err) {
5415b411b363SPhilipp Reisner 		rv = 0;
5416b411b363SPhilipp Reisner 		goto fail;
5417b411b363SPhilipp Reisner 	}
5418b411b363SPhilipp Reisner 
541977351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54201ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
542177351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54229049ccd4SLars Ellenberg 		rv = -1;
5423b411b363SPhilipp Reisner 		goto fail;
5424b411b363SPhilipp Reisner 	}
5425b411b363SPhilipp Reisner 
542677351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54271ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5428b10d96cbSJohannes Thoma 		rv = -1;
5429b411b363SPhilipp Reisner 		goto fail;
5430b411b363SPhilipp Reisner 	}
5431b411b363SPhilipp Reisner 
543267cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
543367cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
543467cca286SPhilipp Reisner 		rv = -1;
543567cca286SPhilipp Reisner 		goto fail;
543667cca286SPhilipp Reisner 	}
543767cca286SPhilipp Reisner 
543877351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
54398404e191SZhen Lei 	if (!peers_ch) {
5440b10d96cbSJohannes Thoma 		rv = -1;
5441b411b363SPhilipp Reisner 		goto fail;
5442b411b363SPhilipp Reisner 	}
5443b411b363SPhilipp Reisner 
5444bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5445a5c31904SAndreas Gruenbacher 	if (err) {
5446b411b363SPhilipp Reisner 		rv = 0;
5447b411b363SPhilipp Reisner 		goto fail;
5448b411b363SPhilipp Reisner 	}
5449b411b363SPhilipp Reisner 
545067cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
545167cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
545267cca286SPhilipp Reisner 		rv = -1;
545367cca286SPhilipp Reisner 		goto fail;
545467cca286SPhilipp Reisner 	}
545567cca286SPhilipp Reisner 
54569534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5457b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
54588404e191SZhen Lei 	if (!response) {
5459b10d96cbSJohannes Thoma 		rv = -1;
5460b411b363SPhilipp Reisner 		goto fail;
5461b411b363SPhilipp Reisner 	}
5462b411b363SPhilipp Reisner 
54639534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5464b411b363SPhilipp Reisner 	if (rv) {
54651ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5466b10d96cbSJohannes Thoma 		rv = -1;
5467b411b363SPhilipp Reisner 		goto fail;
5468b411b363SPhilipp Reisner 	}
5469b411b363SPhilipp Reisner 
5470bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54719f5bdc33SAndreas Gruenbacher 		rv = 0;
54729f5bdc33SAndreas Gruenbacher 		goto fail;
54739f5bdc33SAndreas Gruenbacher 	}
5474bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
54759f5bdc33SAndreas Gruenbacher 				response, resp_size);
5476b411b363SPhilipp Reisner 	if (!rv)
5477b411b363SPhilipp Reisner 		goto fail;
5478b411b363SPhilipp Reisner 
5479bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
548069bc7bc3SAndreas Gruenbacher 	if (err) {
5481b411b363SPhilipp Reisner 		rv = 0;
5482b411b363SPhilipp Reisner 		goto fail;
5483b411b363SPhilipp Reisner 	}
5484b411b363SPhilipp Reisner 
548577351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
54861ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
548777351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5488b411b363SPhilipp Reisner 		rv = 0;
5489b411b363SPhilipp Reisner 		goto fail;
5490b411b363SPhilipp Reisner 	}
5491b411b363SPhilipp Reisner 
549277351055SPhilipp Reisner 	if (pi.size != resp_size) {
54931ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5494b411b363SPhilipp Reisner 		rv = 0;
5495b411b363SPhilipp Reisner 		goto fail;
5496b411b363SPhilipp Reisner 	}
5497b411b363SPhilipp Reisner 
5498bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5499a5c31904SAndreas Gruenbacher 	if (err) {
5500b411b363SPhilipp Reisner 		rv = 0;
5501b411b363SPhilipp Reisner 		goto fail;
5502b411b363SPhilipp Reisner 	}
5503b411b363SPhilipp Reisner 
5504b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55058404e191SZhen Lei 	if (!right_response) {
5506b10d96cbSJohannes Thoma 		rv = -1;
5507b411b363SPhilipp Reisner 		goto fail;
5508b411b363SPhilipp Reisner 	}
5509b411b363SPhilipp Reisner 
55109534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55119534d671SHerbert Xu 				 right_response);
5512b411b363SPhilipp Reisner 	if (rv) {
55131ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5514b10d96cbSJohannes Thoma 		rv = -1;
5515b411b363SPhilipp Reisner 		goto fail;
5516b411b363SPhilipp Reisner 	}
5517b411b363SPhilipp Reisner 
5518b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5519b411b363SPhilipp Reisner 
5520b411b363SPhilipp Reisner 	if (rv)
55211ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
552244ed167dSPhilipp Reisner 		     resp_size);
5523b10d96cbSJohannes Thoma 	else
5524b10d96cbSJohannes Thoma 		rv = -1;
5525b411b363SPhilipp Reisner 
5526b411b363SPhilipp Reisner  fail:
5527b411b363SPhilipp Reisner 	kfree(peers_ch);
5528b411b363SPhilipp Reisner 	kfree(response);
5529b411b363SPhilipp Reisner 	kfree(right_response);
553077ce56e2SArnd Bergmann 	if (desc) {
55319534d671SHerbert Xu 		shash_desc_zero(desc);
553277ce56e2SArnd Bergmann 		kfree(desc);
553377ce56e2SArnd Bergmann 	}
5534b411b363SPhilipp Reisner 
5535b411b363SPhilipp Reisner 	return rv;
5536b411b363SPhilipp Reisner }
5537b411b363SPhilipp Reisner #endif
5538b411b363SPhilipp Reisner 
drbd_receiver(struct drbd_thread * thi)55398fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5540b411b363SPhilipp Reisner {
5541bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5542b411b363SPhilipp Reisner 	int h;
5543b411b363SPhilipp Reisner 
55441ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5545b411b363SPhilipp Reisner 
5546b411b363SPhilipp Reisner 	do {
5547bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5548b411b363SPhilipp Reisner 		if (h == 0) {
5549bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
555020ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5551b411b363SPhilipp Reisner 		}
5552b411b363SPhilipp Reisner 		if (h == -1) {
55531ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5554bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5555b411b363SPhilipp Reisner 		}
5556b411b363SPhilipp Reisner 	} while (h == 0);
5557b411b363SPhilipp Reisner 
5558c51a0ef3SLars Ellenberg 	if (h > 0) {
5559c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5560bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5561c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5562c51a0ef3SLars Ellenberg 	}
5563b411b363SPhilipp Reisner 
5564bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5565b411b363SPhilipp Reisner 
55661ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5567b411b363SPhilipp Reisner 	return 0;
5568b411b363SPhilipp Reisner }
5569b411b363SPhilipp Reisner 
5570b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5571b411b363SPhilipp Reisner 
got_conn_RqSReply(struct drbd_connection * connection,struct packet_info * pi)5572bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5573b411b363SPhilipp Reisner {
5574e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5575b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5576b411b363SPhilipp Reisner 
5577b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5578bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5579b411b363SPhilipp Reisner 	} else {
5580bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
55811ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5582fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5583fc3b10a4SPhilipp Reisner 	}
5584bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5585e4f78edeSPhilipp Reisner 
55862735a594SAndreas Gruenbacher 	return 0;
5587fc3b10a4SPhilipp Reisner }
5588e4f78edeSPhilipp Reisner 
got_RqSReply(struct drbd_connection * connection,struct packet_info * pi)5589bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5590e4f78edeSPhilipp Reisner {
55919f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5592b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5593e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5594e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5595e4f78edeSPhilipp Reisner 
55969f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
55979f4fe9adSAndreas Gruenbacher 	if (!peer_device)
55982735a594SAndreas Gruenbacher 		return -EIO;
55999f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56001952e916SAndreas Gruenbacher 
5601bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56020b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5603bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
56044d0fc3fdSPhilipp Reisner 	}
56054d0fc3fdSPhilipp Reisner 
5606e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5607b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5608e4f78edeSPhilipp Reisner 	} else {
5609b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5610d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5611b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5612b411b363SPhilipp Reisner 	}
5613b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5614b411b363SPhilipp Reisner 
56152735a594SAndreas Gruenbacher 	return 0;
5616b411b363SPhilipp Reisner }
5617b411b363SPhilipp Reisner 
got_Ping(struct drbd_connection * connection,struct packet_info * pi)5618bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5619b411b363SPhilipp Reisner {
5620bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5621b411b363SPhilipp Reisner 
5622b411b363SPhilipp Reisner }
5623b411b363SPhilipp Reisner 
got_PingAck(struct drbd_connection * connection,struct packet_info * pi)5624bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5625b411b363SPhilipp Reisner {
5626b411b363SPhilipp Reisner 	/* restore idle timeout */
5627bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5628bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5629bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5630b411b363SPhilipp Reisner 
56312735a594SAndreas Gruenbacher 	return 0;
5632b411b363SPhilipp Reisner }
5633b411b363SPhilipp Reisner 
got_IsInSync(struct drbd_connection * connection,struct packet_info * pi)5634bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5635b411b363SPhilipp Reisner {
56369f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5637b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5638e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5639b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5640b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5641b411b363SPhilipp Reisner 
56429f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56439f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56442735a594SAndreas Gruenbacher 		return -EIO;
56459f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56461952e916SAndreas Gruenbacher 
56479f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5648b411b363SPhilipp Reisner 
564969a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5650b411b363SPhilipp Reisner 
5651b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5652b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
56530d11f3cfSChristoph Böhmwalder 		drbd_set_in_sync(peer_device, sector, blksize);
5654b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5655b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5656b30ab791SAndreas Gruenbacher 		put_ldev(device);
56571d53f09eSLars Ellenberg 	}
56580d11f3cfSChristoph Böhmwalder 	dec_rs_pending(peer_device);
5659b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5660b411b363SPhilipp Reisner 
56612735a594SAndreas Gruenbacher 	return 0;
5662b411b363SPhilipp Reisner }
5663b411b363SPhilipp Reisner 
5664bc9c5c41SAndreas Gruenbacher static int
validate_req_change_req_state(struct drbd_peer_device * peer_device,u64 id,sector_t sector,struct rb_root * root,const char * func,enum drbd_req_event what,bool missing_ok)5665ad878a0dSChristoph Böhmwalder validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
5666bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5667bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5668b411b363SPhilipp Reisner {
5669ad878a0dSChristoph Böhmwalder 	struct drbd_device *device = peer_device->device;
5670b411b363SPhilipp Reisner 	struct drbd_request *req;
5671b411b363SPhilipp Reisner 	struct bio_and_error m;
5672b411b363SPhilipp Reisner 
56730500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5674b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5675b411b363SPhilipp Reisner 	if (unlikely(!req)) {
56760500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
567785997675SAndreas Gruenbacher 		return -EIO;
5678b411b363SPhilipp Reisner 	}
5679ad878a0dSChristoph Böhmwalder 	__req_mod(req, what, peer_device, &m);
56800500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5681b411b363SPhilipp Reisner 
5682b411b363SPhilipp Reisner 	if (m.bio)
5683b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
568485997675SAndreas Gruenbacher 	return 0;
5685b411b363SPhilipp Reisner }
5686b411b363SPhilipp Reisner 
got_BlockAck(struct drbd_connection * connection,struct packet_info * pi)5687bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5688b411b363SPhilipp Reisner {
56899f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5690b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5691e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5692b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5693b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5694b411b363SPhilipp Reisner 	enum drbd_req_event what;
5695b411b363SPhilipp Reisner 
56969f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56979f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56982735a594SAndreas Gruenbacher 		return -EIO;
56999f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57001952e916SAndreas Gruenbacher 
570169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5702b411b363SPhilipp Reisner 
5703579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
57040d11f3cfSChristoph Böhmwalder 		drbd_set_in_sync(peer_device, sector, blksize);
57050d11f3cfSChristoph Böhmwalder 		dec_rs_pending(peer_device);
57062735a594SAndreas Gruenbacher 		return 0;
5707b411b363SPhilipp Reisner 	}
5708e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5709b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57108554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5711b411b363SPhilipp Reisner 		break;
5712b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57138554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5714b411b363SPhilipp Reisner 		break;
5715b411b363SPhilipp Reisner 	case P_RECV_ACK:
57168554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5717b411b363SPhilipp Reisner 		break;
5718d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5719d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57207be8da07SAndreas Gruenbacher 		break;
57217be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57227be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5723b411b363SPhilipp Reisner 		break;
5724b411b363SPhilipp Reisner 	default:
57252735a594SAndreas Gruenbacher 		BUG();
5726b411b363SPhilipp Reisner 	}
5727b411b363SPhilipp Reisner 
5728ad878a0dSChristoph Böhmwalder 	return validate_req_change_req_state(peer_device, p->block_id, sector,
5729b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5730bc9c5c41SAndreas Gruenbacher 					     what, false);
5731b411b363SPhilipp Reisner }
5732b411b363SPhilipp Reisner 
got_NegAck(struct drbd_connection * connection,struct packet_info * pi)5733bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5734b411b363SPhilipp Reisner {
57359f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5736b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5737e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5738b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57392deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
574085997675SAndreas Gruenbacher 	int err;
5741b411b363SPhilipp Reisner 
57429f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57439f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57442735a594SAndreas Gruenbacher 		return -EIO;
57459f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5746b411b363SPhilipp Reisner 
574769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5748b411b363SPhilipp Reisner 
5749579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
57500d11f3cfSChristoph Böhmwalder 		dec_rs_pending(peer_device);
57510d11f3cfSChristoph Böhmwalder 		drbd_rs_failed_io(peer_device, sector, size);
57522735a594SAndreas Gruenbacher 		return 0;
5753b411b363SPhilipp Reisner 	}
57542deb8336SPhilipp Reisner 
5755ad878a0dSChristoph Böhmwalder 	err = validate_req_change_req_state(peer_device, p->block_id, sector,
5756b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5757303d1448SPhilipp Reisner 					    NEG_ACKED, true);
575885997675SAndreas Gruenbacher 	if (err) {
57592deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
57602deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5761c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
57622deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
57632deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
57640d11f3cfSChristoph Böhmwalder 		drbd_set_out_of_sync(peer_device, sector, size);
57652deb8336SPhilipp Reisner 	}
57662735a594SAndreas Gruenbacher 	return 0;
5767b411b363SPhilipp Reisner }
5768b411b363SPhilipp Reisner 
got_NegDReply(struct drbd_connection * connection,struct packet_info * pi)5769bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5770b411b363SPhilipp Reisner {
57719f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5772b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5773e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5774b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5775b411b363SPhilipp Reisner 
57769f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57779f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57782735a594SAndreas Gruenbacher 		return -EIO;
57799f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57801952e916SAndreas Gruenbacher 
578169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
57827be8da07SAndreas Gruenbacher 
5783d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5784b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5785b411b363SPhilipp Reisner 
5786ad878a0dSChristoph Böhmwalder 	return validate_req_change_req_state(peer_device, p->block_id, sector,
5787b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
57888554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5789b411b363SPhilipp Reisner }
5790b411b363SPhilipp Reisner 
got_NegRSDReply(struct drbd_connection * connection,struct packet_info * pi)5791bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5792b411b363SPhilipp Reisner {
57939f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5794b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5795b411b363SPhilipp Reisner 	sector_t sector;
5796b411b363SPhilipp Reisner 	int size;
5797e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
57981952e916SAndreas Gruenbacher 
57999f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58009f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58012735a594SAndreas Gruenbacher 		return -EIO;
58029f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5803b411b363SPhilipp Reisner 
5804b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5805b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5806b411b363SPhilipp Reisner 
580769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5808b411b363SPhilipp Reisner 
58090d11f3cfSChristoph Böhmwalder 	dec_rs_pending(peer_device);
5810b411b363SPhilipp Reisner 
5811b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5812b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5813e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5814d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
58150d11f3cfSChristoph Böhmwalder 			drbd_rs_failed_io(peer_device, sector, size);
58166327c911SGustavo A. R. Silva 			break;
5817d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5818d612d309SPhilipp Reisner 			break;
5819d612d309SPhilipp Reisner 		default:
58202735a594SAndreas Gruenbacher 			BUG();
5821d612d309SPhilipp Reisner 		}
5822b30ab791SAndreas Gruenbacher 		put_ldev(device);
5823b411b363SPhilipp Reisner 	}
5824b411b363SPhilipp Reisner 
58252735a594SAndreas Gruenbacher 	return 0;
5826b411b363SPhilipp Reisner }
5827b411b363SPhilipp Reisner 
got_BarrierAck(struct drbd_connection * connection,struct packet_info * pi)5828bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5829b411b363SPhilipp Reisner {
5830e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5831c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58329ed57dcbSLars Ellenberg 	int vnr;
5833b411b363SPhilipp Reisner 
5834bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5835b411b363SPhilipp Reisner 
58369ed57dcbSLars Ellenberg 	rcu_read_lock();
5837c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5838c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5839c06ece6bSAndreas Gruenbacher 
5840b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5841b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5842b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5843b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5844b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5845c4752ef1SPhilipp Reisner 		}
58469ed57dcbSLars Ellenberg 	}
58479ed57dcbSLars Ellenberg 	rcu_read_unlock();
5848c4752ef1SPhilipp Reisner 
58492735a594SAndreas Gruenbacher 	return 0;
5850b411b363SPhilipp Reisner }
5851b411b363SPhilipp Reisner 
got_OVResult(struct drbd_connection * connection,struct packet_info * pi)5852bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5853b411b363SPhilipp Reisner {
58549f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5855b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5856e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
585784b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5858b411b363SPhilipp Reisner 	sector_t sector;
5859b411b363SPhilipp Reisner 	int size;
5860b411b363SPhilipp Reisner 
58619f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58629f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58632735a594SAndreas Gruenbacher 		return -EIO;
58649f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58651952e916SAndreas Gruenbacher 
5866b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5867b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5868b411b363SPhilipp Reisner 
586969a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5870b411b363SPhilipp Reisner 
5871b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
58720d11f3cfSChristoph Böhmwalder 		drbd_ov_out_of_sync_found(peer_device, sector, size);
5873b411b363SPhilipp Reisner 	else
58740d11f3cfSChristoph Böhmwalder 		ov_out_of_sync_print(peer_device);
5875b411b363SPhilipp Reisner 
5876b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
58772735a594SAndreas Gruenbacher 		return 0;
58781d53f09eSLars Ellenberg 
5879b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
58800d11f3cfSChristoph Böhmwalder 	dec_rs_pending(peer_device);
5881b411b363SPhilipp Reisner 
5882b30ab791SAndreas Gruenbacher 	--device->ov_left;
5883ea5442afSLars Ellenberg 
5884ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5885b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
58860d11f3cfSChristoph Böhmwalder 		drbd_advance_rs_marks(peer_device, device->ov_left);
5887ea5442afSLars Ellenberg 
5888b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
588984b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
589084b8c06bSAndreas Gruenbacher 		if (dw) {
589184b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
589284b8c06bSAndreas Gruenbacher 			dw->device = device;
589384b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5894b411b363SPhilipp Reisner 		} else {
589584b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
58960d11f3cfSChristoph Böhmwalder 			ov_out_of_sync_print(peer_device);
58970d11f3cfSChristoph Böhmwalder 			drbd_resync_finished(peer_device);
5898b411b363SPhilipp Reisner 		}
5899b411b363SPhilipp Reisner 	}
5900b30ab791SAndreas Gruenbacher 	put_ldev(device);
59012735a594SAndreas Gruenbacher 	return 0;
5902b411b363SPhilipp Reisner }
5903b411b363SPhilipp Reisner 
got_skip(struct drbd_connection * connection,struct packet_info * pi)5904bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59050ced55a3SPhilipp Reisner {
59062735a594SAndreas Gruenbacher 	return 0;
59070ced55a3SPhilipp Reisner }
59080ced55a3SPhilipp Reisner 
5909668700b4SPhilipp Reisner struct meta_sock_cmd {
5910b411b363SPhilipp Reisner 	size_t pkt_size;
5911bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5912b411b363SPhilipp Reisner };
5913b411b363SPhilipp Reisner 
set_rcvtimeo(struct drbd_connection * connection,bool ping_timeout)5914668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5915668700b4SPhilipp Reisner {
5916668700b4SPhilipp Reisner 	long t;
5917668700b4SPhilipp Reisner 	struct net_conf *nc;
5918668700b4SPhilipp Reisner 
5919668700b4SPhilipp Reisner 	rcu_read_lock();
5920668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5921668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5922668700b4SPhilipp Reisner 	rcu_read_unlock();
5923668700b4SPhilipp Reisner 
5924668700b4SPhilipp Reisner 	t *= HZ;
5925668700b4SPhilipp Reisner 	if (ping_timeout)
5926668700b4SPhilipp Reisner 		t /= 10;
5927668700b4SPhilipp Reisner 
5928668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5929668700b4SPhilipp Reisner }
5930668700b4SPhilipp Reisner 
set_ping_timeout(struct drbd_connection * connection)5931668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5932668700b4SPhilipp Reisner {
5933668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5934668700b4SPhilipp Reisner }
5935668700b4SPhilipp Reisner 
set_idle_timeout(struct drbd_connection * connection)5936668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5937668700b4SPhilipp Reisner {
5938668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5939668700b4SPhilipp Reisner }
5940668700b4SPhilipp Reisner 
5941668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5942e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5943e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5944b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5945b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5946b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5947d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5948b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5949b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5950b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5951b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5952b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5953b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5954b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
595502918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5956d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59571952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59581952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5959b411b363SPhilipp Reisner };
5960b411b363SPhilipp Reisner 
drbd_ack_receiver(struct drbd_thread * thi)59611c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5962b411b363SPhilipp Reisner {
5963bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5964668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
596577351055SPhilipp Reisner 	struct packet_info pi;
5966668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
5967257d0af6SPhilipp Reisner 	int rv;
5968bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
5969b411b363SPhilipp Reisner 	int received = 0;
5970bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
597152b061a4SAndreas Gruenbacher 	int expect   = header_size;
597244ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
5973b411b363SPhilipp Reisner 
59748b700983SPeter Zijlstra 	sched_set_fifo_low(current);
5975b411b363SPhilipp Reisner 
5976e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
597780822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
597844ed167dSPhilipp Reisner 
5979668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
598044ed167dSPhilipp Reisner 
5981bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5982bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
59831ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
5984841ce241SAndreas Gruenbacher 				goto reconnect;
5985841ce241SAndreas Gruenbacher 			}
5986668700b4SPhilipp Reisner 			set_ping_timeout(connection);
598744ed167dSPhilipp Reisner 			ping_timeout_active = true;
5988b411b363SPhilipp Reisner 		}
5989b411b363SPhilipp Reisner 
5990668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
5991bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5992b411b363SPhilipp Reisner 
5993b411b363SPhilipp Reisner 		/* Note:
5994b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
5995b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
5996b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
5997b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
5998b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
5999b411b363SPhilipp Reisner 		 * rv == expected: full header or command
6000b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
6001b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
6002b411b363SPhilipp Reisner 		 */
6003b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
6004b411b363SPhilipp Reisner 			received += rv;
6005b411b363SPhilipp Reisner 			buf	 += rv;
6006b411b363SPhilipp Reisner 		} else if (rv == 0) {
6007bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6008b66623e3SPhilipp Reisner 				long t;
6009b66623e3SPhilipp Reisner 				rcu_read_lock();
6010bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6011b66623e3SPhilipp Reisner 				rcu_read_unlock();
6012b66623e3SPhilipp Reisner 
6013bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6014bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6015b66623e3SPhilipp Reisner 						       t);
6016599377acSPhilipp Reisner 				if (t)
6017599377acSPhilipp Reisner 					break;
6018599377acSPhilipp Reisner 			}
60191ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6020b411b363SPhilipp Reisner 			goto reconnect;
6021b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6022cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6023cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6024668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6025cb6518cbSLars Ellenberg 				continue;
6026f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60271ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6028b411b363SPhilipp Reisner 				goto reconnect;
6029b411b363SPhilipp Reisner 			}
6030bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6031b411b363SPhilipp Reisner 			continue;
6032b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6033668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6034668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6035668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6036668700b4SPhilipp Reisner 			flush_signals(current);
6037b411b363SPhilipp Reisner 			continue;
6038b411b363SPhilipp Reisner 		} else {
60391ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6040b411b363SPhilipp Reisner 			goto reconnect;
6041b411b363SPhilipp Reisner 		}
6042b411b363SPhilipp Reisner 
6043b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6044bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6045b411b363SPhilipp Reisner 				goto reconnect;
6046668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6047668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60481ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60492fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6050b411b363SPhilipp Reisner 				goto disconnect;
6051b411b363SPhilipp Reisner 			}
6052e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
605352b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60541ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
605577351055SPhilipp Reisner 					pi.cmd, pi.size);
6056b411b363SPhilipp Reisner 				goto reconnect;
6057b411b363SPhilipp Reisner 			}
6058257d0af6SPhilipp Reisner 		}
6059b411b363SPhilipp Reisner 		if (received == expect) {
60602735a594SAndreas Gruenbacher 			bool err;
6061a4fbda8eSPhilipp Reisner 
6062bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
60632735a594SAndreas Gruenbacher 			if (err) {
6064d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6065b411b363SPhilipp Reisner 				goto reconnect;
60661952e916SAndreas Gruenbacher 			}
6067b411b363SPhilipp Reisner 
6068bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6069f36af18cSLars Ellenberg 
6070668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6071668700b4SPhilipp Reisner 				set_idle_timeout(connection);
607244ed167dSPhilipp Reisner 				ping_timeout_active = false;
607344ed167dSPhilipp Reisner 			}
6074b411b363SPhilipp Reisner 
6075bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6076b411b363SPhilipp Reisner 			received = 0;
607752b061a4SAndreas Gruenbacher 			expect	 = header_size;
6078b411b363SPhilipp Reisner 			cmd	 = NULL;
6079b411b363SPhilipp Reisner 		}
6080b411b363SPhilipp Reisner 	}
6081b411b363SPhilipp Reisner 
6082b411b363SPhilipp Reisner 	if (0) {
6083b411b363SPhilipp Reisner reconnect:
6084bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6085bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6086b411b363SPhilipp Reisner 	}
6087b411b363SPhilipp Reisner 	if (0) {
6088b411b363SPhilipp Reisner disconnect:
6089bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6090b411b363SPhilipp Reisner 	}
6091b411b363SPhilipp Reisner 
6092668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6093b411b363SPhilipp Reisner 
6094b411b363SPhilipp Reisner 	return 0;
6095b411b363SPhilipp Reisner }
6096668700b4SPhilipp Reisner 
drbd_send_acks_wf(struct work_struct * ws)6097668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6098668700b4SPhilipp Reisner {
6099668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6100668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6101668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6102668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6103668700b4SPhilipp Reisner 	struct net_conf *nc;
6104668700b4SPhilipp Reisner 	int tcp_cork, err;
6105668700b4SPhilipp Reisner 
6106668700b4SPhilipp Reisner 	rcu_read_lock();
6107668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6108668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6109668700b4SPhilipp Reisner 	rcu_read_unlock();
6110668700b4SPhilipp Reisner 
6111668700b4SPhilipp Reisner 	if (tcp_cork)
6112db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, true);
6113668700b4SPhilipp Reisner 
6114668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6115668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6116668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6117668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6118668700b4SPhilipp Reisner 
6119668700b4SPhilipp Reisner 	if (err) {
6120668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6121668700b4SPhilipp Reisner 		return;
6122668700b4SPhilipp Reisner 	}
6123668700b4SPhilipp Reisner 
6124668700b4SPhilipp Reisner 	if (tcp_cork)
6125db10538aSChristoph Hellwig 		tcp_sock_set_cork(connection->meta.socket->sk, false);
6126668700b4SPhilipp Reisner 
6127668700b4SPhilipp Reisner 	return;
6128668700b4SPhilipp Reisner }
6129