1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner    drbd_receiver.c
4b411b363SPhilipp Reisner 
5b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner 
7b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner 
11b411b363SPhilipp Reisner  */
12b411b363SPhilipp Reisner 
13b411b363SPhilipp Reisner 
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner 
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner 
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__
31b411b363SPhilipp Reisner #include <linux/unistd.h>
32b411b363SPhilipp Reisner #include <linux/vmalloc.h>
33b411b363SPhilipp Reisner #include <linux/random.h>
34b411b363SPhilipp Reisner #include <linux/string.h>
35b411b363SPhilipp Reisner #include <linux/scatterlist.h>
36b411b363SPhilipp Reisner #include "drbd_int.h"
37a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
38b411b363SPhilipp Reisner #include "drbd_req.h"
39b411b363SPhilipp Reisner #include "drbd_vli.h"
40b411b363SPhilipp Reisner 
41f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4220c68fdeSLars Ellenberg 
4377351055SPhilipp Reisner struct packet_info {
4477351055SPhilipp Reisner 	enum drbd_packet cmd;
45e2857216SAndreas Gruenbacher 	unsigned int size;
46e2857216SAndreas Gruenbacher 	unsigned int vnr;
47e658983aSAndreas Gruenbacher 	void *data;
4877351055SPhilipp Reisner };
4977351055SPhilipp Reisner 
50b411b363SPhilipp Reisner enum finish_epoch {
51b411b363SPhilipp Reisner 	FE_STILL_LIVE,
52b411b363SPhilipp Reisner 	FE_DESTROYED,
53b411b363SPhilipp Reisner 	FE_RECYCLED,
54b411b363SPhilipp Reisner };
55b411b363SPhilipp Reisner 
56bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
57bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
59a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
60bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
62b411b363SPhilipp Reisner 
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
65b411b363SPhilipp Reisner 
6645bb912bSLars Ellenberg /*
6745bb912bSLars Ellenberg  * some helper functions to deal with single linked page lists,
6845bb912bSLars Ellenberg  * page->private being our "next" pointer.
6945bb912bSLars Ellenberg  */
7045bb912bSLars Ellenberg 
7145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7245bb912bSLars Ellenberg  * Otherwise, don't modify head, and return NULL.
7345bb912bSLars Ellenberg  * Locking is the responsibility of the caller.
7445bb912bSLars Ellenberg  */
7545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7645bb912bSLars Ellenberg {
7745bb912bSLars Ellenberg 	struct page *page;
7845bb912bSLars Ellenberg 	struct page *tmp;
7945bb912bSLars Ellenberg 
8045bb912bSLars Ellenberg 	BUG_ON(!n);
8145bb912bSLars Ellenberg 	BUG_ON(!head);
8245bb912bSLars Ellenberg 
8345bb912bSLars Ellenberg 	page = *head;
8423ce4227SPhilipp Reisner 
8523ce4227SPhilipp Reisner 	if (!page)
8623ce4227SPhilipp Reisner 		return NULL;
8723ce4227SPhilipp Reisner 
8845bb912bSLars Ellenberg 	while (page) {
8945bb912bSLars Ellenberg 		tmp = page_chain_next(page);
9045bb912bSLars Ellenberg 		if (--n == 0)
9145bb912bSLars Ellenberg 			break; /* found sufficient pages */
9245bb912bSLars Ellenberg 		if (tmp == NULL)
9345bb912bSLars Ellenberg 			/* insufficient pages, don't use any of them. */
9445bb912bSLars Ellenberg 			return NULL;
9545bb912bSLars Ellenberg 		page = tmp;
9645bb912bSLars Ellenberg 	}
9745bb912bSLars Ellenberg 
9845bb912bSLars Ellenberg 	/* add end of list marker for the returned list */
9945bb912bSLars Ellenberg 	set_page_private(page, 0);
10045bb912bSLars Ellenberg 	/* actual return value, and adjustment of head */
10145bb912bSLars Ellenberg 	page = *head;
10245bb912bSLars Ellenberg 	*head = tmp;
10345bb912bSLars Ellenberg 	return page;
10445bb912bSLars Ellenberg }
10545bb912bSLars Ellenberg 
10645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10745bb912bSLars Ellenberg  * "private" page chain, before adding it back to a global chain head
10845bb912bSLars Ellenberg  * with page_chain_add() under a spinlock. */
10945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11045bb912bSLars Ellenberg {
11145bb912bSLars Ellenberg 	struct page *tmp;
11245bb912bSLars Ellenberg 	int i = 1;
11345bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page)))
11445bb912bSLars Ellenberg 		++i, page = tmp;
11545bb912bSLars Ellenberg 	if (len)
11645bb912bSLars Ellenberg 		*len = i;
11745bb912bSLars Ellenberg 	return page;
11845bb912bSLars Ellenberg }
11945bb912bSLars Ellenberg 
12045bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12145bb912bSLars Ellenberg {
12245bb912bSLars Ellenberg 	struct page *tmp;
12345bb912bSLars Ellenberg 	int i = 0;
12445bb912bSLars Ellenberg 	page_chain_for_each_safe(page, tmp) {
12545bb912bSLars Ellenberg 		put_page(page);
12645bb912bSLars Ellenberg 		++i;
12745bb912bSLars Ellenberg 	}
12845bb912bSLars Ellenberg 	return i;
12945bb912bSLars Ellenberg }
13045bb912bSLars Ellenberg 
13145bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13245bb912bSLars Ellenberg 		struct page *chain_first, struct page *chain_last)
13345bb912bSLars Ellenberg {
13445bb912bSLars Ellenberg #if 1
13545bb912bSLars Ellenberg 	struct page *tmp;
13645bb912bSLars Ellenberg 	tmp = page_chain_tail(chain_first, NULL);
13745bb912bSLars Ellenberg 	BUG_ON(tmp != chain_last);
13845bb912bSLars Ellenberg #endif
13945bb912bSLars Ellenberg 
14045bb912bSLars Ellenberg 	/* add chain to head */
14145bb912bSLars Ellenberg 	set_page_private(chain_last, (unsigned long)*head);
14245bb912bSLars Ellenberg 	*head = chain_first;
14345bb912bSLars Ellenberg }
14445bb912bSLars Ellenberg 
145b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14618c2d522SAndreas Gruenbacher 				       unsigned int number)
147b411b363SPhilipp Reisner {
148b411b363SPhilipp Reisner 	struct page *page = NULL;
14945bb912bSLars Ellenberg 	struct page *tmp = NULL;
15018c2d522SAndreas Gruenbacher 	unsigned int i = 0;
151b411b363SPhilipp Reisner 
152b411b363SPhilipp Reisner 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
153b411b363SPhilipp Reisner 	 * So what. It saves a spin_lock. */
15445bb912bSLars Ellenberg 	if (drbd_pp_vacant >= number) {
155b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
15645bb912bSLars Ellenberg 		page = page_chain_del(&drbd_pp_pool, number);
15745bb912bSLars Ellenberg 		if (page)
15845bb912bSLars Ellenberg 			drbd_pp_vacant -= number;
159b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
16045bb912bSLars Ellenberg 		if (page)
16145bb912bSLars Ellenberg 			return page;
162b411b363SPhilipp Reisner 	}
16345bb912bSLars Ellenberg 
164b411b363SPhilipp Reisner 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
165b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
166b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
16745bb912bSLars Ellenberg 	for (i = 0; i < number; i++) {
16845bb912bSLars Ellenberg 		tmp = alloc_page(GFP_TRY);
16945bb912bSLars Ellenberg 		if (!tmp)
17045bb912bSLars Ellenberg 			break;
17145bb912bSLars Ellenberg 		set_page_private(tmp, (unsigned long)page);
17245bb912bSLars Ellenberg 		page = tmp;
17345bb912bSLars Ellenberg 	}
17445bb912bSLars Ellenberg 
17545bb912bSLars Ellenberg 	if (i == number)
176b411b363SPhilipp Reisner 		return page;
17745bb912bSLars Ellenberg 
17845bb912bSLars Ellenberg 	/* Not enough pages immediately available this time.
179c37c8ecfSAndreas Gruenbacher 	 * No need to jump around here, drbd_alloc_pages will retry this
18045bb912bSLars Ellenberg 	 * function "soon". */
18145bb912bSLars Ellenberg 	if (page) {
18245bb912bSLars Ellenberg 		tmp = page_chain_tail(page, NULL);
18345bb912bSLars Ellenberg 		spin_lock(&drbd_pp_lock);
18445bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
18545bb912bSLars Ellenberg 		drbd_pp_vacant += i;
18645bb912bSLars Ellenberg 		spin_unlock(&drbd_pp_lock);
18745bb912bSLars Ellenberg 	}
18845bb912bSLars Ellenberg 	return NULL;
189b411b363SPhilipp Reisner }
190b411b363SPhilipp Reisner 
191b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
192a990be46SAndreas Gruenbacher 					   struct list_head *to_be_freed)
193b411b363SPhilipp Reisner {
194a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *tmp;
195b411b363SPhilipp Reisner 
196b411b363SPhilipp Reisner 	/* The EEs are always appended to the end of the list. Since
197b411b363SPhilipp Reisner 	   they are sent in order over the wire, they have to finish
198b411b363SPhilipp Reisner 	   in order. As soon as we see the first not finished we can
199b411b363SPhilipp Reisner 	   stop to examine the list... */
200b411b363SPhilipp Reisner 
201a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
202045417f7SAndreas Gruenbacher 		if (drbd_peer_req_has_active_page(peer_req))
203b411b363SPhilipp Reisner 			break;
204a8cd15baSAndreas Gruenbacher 		list_move(&peer_req->w.list, to_be_freed);
205b411b363SPhilipp Reisner 	}
206b411b363SPhilipp Reisner }
207b411b363SPhilipp Reisner 
208668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
209b411b363SPhilipp Reisner {
210b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
211db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
212b411b363SPhilipp Reisner 
2130500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
214b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
2150500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
216a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
217b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
218b411b363SPhilipp Reisner }
219b411b363SPhilipp Reisner 
220668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
221668700b4SPhilipp Reisner {
222668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device;
223668700b4SPhilipp Reisner 	int vnr;
224668700b4SPhilipp Reisner 
225668700b4SPhilipp Reisner 	rcu_read_lock();
226668700b4SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
227668700b4SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
228668700b4SPhilipp Reisner 		if (!atomic_read(&device->pp_in_use_by_net))
229668700b4SPhilipp Reisner 			continue;
230668700b4SPhilipp Reisner 
231668700b4SPhilipp Reisner 		kref_get(&device->kref);
232668700b4SPhilipp Reisner 		rcu_read_unlock();
233668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
234668700b4SPhilipp Reisner 		kref_put(&device->kref, drbd_destroy_device);
235668700b4SPhilipp Reisner 		rcu_read_lock();
236668700b4SPhilipp Reisner 	}
237668700b4SPhilipp Reisner 	rcu_read_unlock();
238668700b4SPhilipp Reisner }
239668700b4SPhilipp Reisner 
240b411b363SPhilipp Reisner /**
241c37c8ecfSAndreas Gruenbacher  * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
242b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
24345bb912bSLars Ellenberg  * @number:	number of pages requested
24445bb912bSLars Ellenberg  * @retry:	whether to retry, if not enough pages are available right now
245b411b363SPhilipp Reisner  *
24645bb912bSLars Ellenberg  * Tries to allocate number pages, first from our own page pool, then from
2470e49d7b0SLars Ellenberg  * the kernel.
24845bb912bSLars Ellenberg  * Possibly retry until DRBD frees sufficient pages somewhere else.
24945bb912bSLars Ellenberg  *
2500e49d7b0SLars Ellenberg  * If this allocation would exceed the max_buffers setting, we throttle
2510e49d7b0SLars Ellenberg  * allocation (schedule_timeout) to give the system some room to breathe.
2520e49d7b0SLars Ellenberg  *
2530e49d7b0SLars Ellenberg  * We do not use max-buffers as hard limit, because it could lead to
2540e49d7b0SLars Ellenberg  * congestion and further to a distributed deadlock during online-verify or
2550e49d7b0SLars Ellenberg  * (checksum based) resync, if the max-buffers, socket buffer sizes and
2560e49d7b0SLars Ellenberg  * resync-rate settings are mis-configured.
2570e49d7b0SLars Ellenberg  *
25845bb912bSLars Ellenberg  * Returns a page chain linked via page->private.
259b411b363SPhilipp Reisner  */
26069a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
261c37c8ecfSAndreas Gruenbacher 			      bool retry)
262b411b363SPhilipp Reisner {
26369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
264b411b363SPhilipp Reisner 	struct page *page = NULL;
26544ed167dSPhilipp Reisner 	struct net_conf *nc;
266b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2670e49d7b0SLars Ellenberg 	unsigned int mxb;
268b411b363SPhilipp Reisner 
26944ed167dSPhilipp Reisner 	rcu_read_lock();
27069a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
27144ed167dSPhilipp Reisner 	mxb = nc ? nc->max_buffers : 1000000;
27244ed167dSPhilipp Reisner 	rcu_read_unlock();
27344ed167dSPhilipp Reisner 
274b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->pp_in_use) < mxb)
275b30ab791SAndreas Gruenbacher 		page = __drbd_alloc_pages(device, number);
276b411b363SPhilipp Reisner 
277668700b4SPhilipp Reisner 	/* Try to keep the fast path fast, but occasionally we need
278668700b4SPhilipp Reisner 	 * to reclaim the pages we lended to the network stack. */
279668700b4SPhilipp Reisner 	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
280668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
281668700b4SPhilipp Reisner 
28245bb912bSLars Ellenberg 	while (page == NULL) {
283b411b363SPhilipp Reisner 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
284b411b363SPhilipp Reisner 
285668700b4SPhilipp Reisner 		drbd_reclaim_net_peer_reqs(device);
286b411b363SPhilipp Reisner 
287b30ab791SAndreas Gruenbacher 		if (atomic_read(&device->pp_in_use) < mxb) {
288b30ab791SAndreas Gruenbacher 			page = __drbd_alloc_pages(device, number);
289b411b363SPhilipp Reisner 			if (page)
290b411b363SPhilipp Reisner 				break;
291b411b363SPhilipp Reisner 		}
292b411b363SPhilipp Reisner 
293b411b363SPhilipp Reisner 		if (!retry)
294b411b363SPhilipp Reisner 			break;
295b411b363SPhilipp Reisner 
296b411b363SPhilipp Reisner 		if (signal_pending(current)) {
297d0180171SAndreas Gruenbacher 			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
298b411b363SPhilipp Reisner 			break;
299b411b363SPhilipp Reisner 		}
300b411b363SPhilipp Reisner 
3010e49d7b0SLars Ellenberg 		if (schedule_timeout(HZ/10) == 0)
3020e49d7b0SLars Ellenberg 			mxb = UINT_MAX;
303b411b363SPhilipp Reisner 	}
304b411b363SPhilipp Reisner 	finish_wait(&drbd_pp_wait, &wait);
305b411b363SPhilipp Reisner 
30645bb912bSLars Ellenberg 	if (page)
307b30ab791SAndreas Gruenbacher 		atomic_add(number, &device->pp_in_use);
308b411b363SPhilipp Reisner 	return page;
309b411b363SPhilipp Reisner }
310b411b363SPhilipp Reisner 
311c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3120500813fSAndreas Gruenbacher  * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31345bb912bSLars Ellenberg  * Either links the page chain back to the global pool,
31445bb912bSLars Ellenberg  * or returns all pages to the system. */
315b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
316b411b363SPhilipp Reisner {
317b30ab791SAndreas Gruenbacher 	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
318b411b363SPhilipp Reisner 	int i;
319435f0740SLars Ellenberg 
320a73ff323SLars Ellenberg 	if (page == NULL)
321a73ff323SLars Ellenberg 		return;
322a73ff323SLars Ellenberg 
323183ece30SRoland Kammerer 	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32445bb912bSLars Ellenberg 		i = page_chain_free(page);
32545bb912bSLars Ellenberg 	else {
32645bb912bSLars Ellenberg 		struct page *tmp;
32745bb912bSLars Ellenberg 		tmp = page_chain_tail(page, &i);
328b411b363SPhilipp Reisner 		spin_lock(&drbd_pp_lock);
32945bb912bSLars Ellenberg 		page_chain_add(&drbd_pp_pool, page, tmp);
33045bb912bSLars Ellenberg 		drbd_pp_vacant += i;
331b411b363SPhilipp Reisner 		spin_unlock(&drbd_pp_lock);
332b411b363SPhilipp Reisner 	}
333435f0740SLars Ellenberg 	i = atomic_sub_return(i, a);
33445bb912bSLars Ellenberg 	if (i < 0)
335d0180171SAndreas Gruenbacher 		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
336435f0740SLars Ellenberg 			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
337b411b363SPhilipp Reisner 	wake_up(&drbd_pp_wait);
338b411b363SPhilipp Reisner }
339b411b363SPhilipp Reisner 
340b411b363SPhilipp Reisner /*
341b411b363SPhilipp Reisner You need to hold the req_lock:
342b411b363SPhilipp Reisner  _drbd_wait_ee_list_empty()
343b411b363SPhilipp Reisner 
344b411b363SPhilipp Reisner You must not have the req_lock:
3453967deb1SAndreas Gruenbacher  drbd_free_peer_req()
3460db55363SAndreas Gruenbacher  drbd_alloc_peer_req()
3477721f567SAndreas Gruenbacher  drbd_free_peer_reqs()
348b411b363SPhilipp Reisner  drbd_ee_fix_bhs()
349a990be46SAndreas Gruenbacher  drbd_finish_peer_reqs()
350b411b363SPhilipp Reisner  drbd_clear_done_ee()
351b411b363SPhilipp Reisner  drbd_wait_ee_list_empty()
352b411b363SPhilipp Reisner */
353b411b363SPhilipp Reisner 
3549104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3559104d31aSLars Ellenberg  * w_same: payload_size == logical_block_size
3569104d31aSLars Ellenberg  * trim: payload_size == 0 */
357f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
35869a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3599104d31aSLars Ellenberg 		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
360b411b363SPhilipp Reisner {
36169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
362db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
363a73ff323SLars Ellenberg 	struct page *page = NULL;
3649104d31aSLars Ellenberg 	unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
365b411b363SPhilipp Reisner 
366b30ab791SAndreas Gruenbacher 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
367b411b363SPhilipp Reisner 		return NULL;
368b411b363SPhilipp Reisner 
3690892fac8SKent Overstreet 	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
370db830c46SAndreas Gruenbacher 	if (!peer_req) {
371b411b363SPhilipp Reisner 		if (!(gfp_mask & __GFP_NOWARN))
372d0180171SAndreas Gruenbacher 			drbd_err(device, "%s: allocation failed\n", __func__);
373b411b363SPhilipp Reisner 		return NULL;
374b411b363SPhilipp Reisner 	}
375b411b363SPhilipp Reisner 
3769104d31aSLars Ellenberg 	if (nr_pages) {
377d0164adcSMel Gorman 		page = drbd_alloc_pages(peer_device, nr_pages,
378d0164adcSMel Gorman 					gfpflags_allow_blocking(gfp_mask));
37945bb912bSLars Ellenberg 		if (!page)
38045bb912bSLars Ellenberg 			goto fail;
381a73ff323SLars Ellenberg 	}
382b411b363SPhilipp Reisner 
383c5a2c150SLars Ellenberg 	memset(peer_req, 0, sizeof(*peer_req));
384c5a2c150SLars Ellenberg 	INIT_LIST_HEAD(&peer_req->w.list);
385db830c46SAndreas Gruenbacher 	drbd_clear_interval(&peer_req->i);
3869104d31aSLars Ellenberg 	peer_req->i.size = request_size;
387db830c46SAndreas Gruenbacher 	peer_req->i.sector = sector;
388c5a2c150SLars Ellenberg 	peer_req->submit_jif = jiffies;
389a8cd15baSAndreas Gruenbacher 	peer_req->peer_device = peer_device;
390db830c46SAndreas Gruenbacher 	peer_req->pages = page;
3919a8e7753SAndreas Gruenbacher 	/*
3929a8e7753SAndreas Gruenbacher 	 * The block_id is opaque to the receiver.  It is not endianness
3939a8e7753SAndreas Gruenbacher 	 * converted, and sent back to the sender unchanged.
3949a8e7753SAndreas Gruenbacher 	 */
395db830c46SAndreas Gruenbacher 	peer_req->block_id = id;
396b411b363SPhilipp Reisner 
397db830c46SAndreas Gruenbacher 	return peer_req;
398b411b363SPhilipp Reisner 
39945bb912bSLars Ellenberg  fail:
4000892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
401b411b363SPhilipp Reisner 	return NULL;
402b411b363SPhilipp Reisner }
403b411b363SPhilipp Reisner 
404b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
405f6ffca9fSAndreas Gruenbacher 		       int is_net)
406b411b363SPhilipp Reisner {
40721ae5d7fSLars Ellenberg 	might_sleep();
408db830c46SAndreas Gruenbacher 	if (peer_req->flags & EE_HAS_DIGEST)
409db830c46SAndreas Gruenbacher 		kfree(peer_req->digest);
410b30ab791SAndreas Gruenbacher 	drbd_free_pages(device, peer_req->pages, is_net);
4110b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4120b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41321ae5d7fSLars Ellenberg 	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41421ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41521ae5d7fSLars Ellenberg 		drbd_al_complete_io(device, &peer_req->i);
41621ae5d7fSLars Ellenberg 	}
4170892fac8SKent Overstreet 	mempool_free(peer_req, &drbd_ee_mempool);
418b411b363SPhilipp Reisner }
419b411b363SPhilipp Reisner 
420b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
421b411b363SPhilipp Reisner {
422b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
423db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
424b411b363SPhilipp Reisner 	int count = 0;
425b30ab791SAndreas Gruenbacher 	int is_net = list == &device->net_ee;
426b411b363SPhilipp Reisner 
4270500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
428b411b363SPhilipp Reisner 	list_splice_init(list, &work_list);
4290500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
430b411b363SPhilipp Reisner 
431a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
432b30ab791SAndreas Gruenbacher 		__drbd_free_peer_req(device, peer_req, is_net);
433b411b363SPhilipp Reisner 		count++;
434b411b363SPhilipp Reisner 	}
435b411b363SPhilipp Reisner 	return count;
436b411b363SPhilipp Reisner }
437b411b363SPhilipp Reisner 
438b411b363SPhilipp Reisner /*
439a990be46SAndreas Gruenbacher  * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
440b411b363SPhilipp Reisner  */
441b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
442b411b363SPhilipp Reisner {
443b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
444b411b363SPhilipp Reisner 	LIST_HEAD(reclaimed);
445db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req, *t;
446e2b3032bSAndreas Gruenbacher 	int err = 0;
447b411b363SPhilipp Reisner 
4480500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
449b30ab791SAndreas Gruenbacher 	reclaim_finished_net_peer_reqs(device, &reclaimed);
450b30ab791SAndreas Gruenbacher 	list_splice_init(&device->done_ee, &work_list);
4510500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
452b411b363SPhilipp Reisner 
453a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
454b30ab791SAndreas Gruenbacher 		drbd_free_net_peer_req(device, peer_req);
455b411b363SPhilipp Reisner 
456b411b363SPhilipp Reisner 	/* possible callbacks here:
457d4dabbe2SLars Ellenberg 	 * e_end_block, and e_end_resync_block, e_send_superseded.
458b411b363SPhilipp Reisner 	 * all ignore the last argument.
459b411b363SPhilipp Reisner 	 */
460a8cd15baSAndreas Gruenbacher 	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
461e2b3032bSAndreas Gruenbacher 		int err2;
462e2b3032bSAndreas Gruenbacher 
463b411b363SPhilipp Reisner 		/* list_del not necessary, next/prev members not touched */
464a8cd15baSAndreas Gruenbacher 		err2 = peer_req->w.cb(&peer_req->w, !!err);
465e2b3032bSAndreas Gruenbacher 		if (!err)
466e2b3032bSAndreas Gruenbacher 			err = err2;
467b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
468b411b363SPhilipp Reisner 	}
469b30ab791SAndreas Gruenbacher 	wake_up(&device->ee_wait);
470b411b363SPhilipp Reisner 
471e2b3032bSAndreas Gruenbacher 	return err;
472b411b363SPhilipp Reisner }
473b411b363SPhilipp Reisner 
474b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
475d4da1537SAndreas Gruenbacher 				     struct list_head *head)
476b411b363SPhilipp Reisner {
477b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
478b411b363SPhilipp Reisner 
479b411b363SPhilipp Reisner 	/* avoids spin_lock/unlock
480b411b363SPhilipp Reisner 	 * and calling prepare_to_wait in the fast path */
481b411b363SPhilipp Reisner 	while (!list_empty(head)) {
482b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4830500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4847eaceaccSJens Axboe 		io_schedule();
485b30ab791SAndreas Gruenbacher 		finish_wait(&device->ee_wait, &wait);
4860500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
487b411b363SPhilipp Reisner 	}
488b411b363SPhilipp Reisner }
489b411b363SPhilipp Reisner 
490b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
491d4da1537SAndreas Gruenbacher 				    struct list_head *head)
492b411b363SPhilipp Reisner {
4930500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
494b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, head);
4950500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
496b411b363SPhilipp Reisner }
497b411b363SPhilipp Reisner 
498dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
499b411b363SPhilipp Reisner {
500b411b363SPhilipp Reisner 	struct kvec iov = {
501b411b363SPhilipp Reisner 		.iov_base = buf,
502b411b363SPhilipp Reisner 		.iov_len = size,
503b411b363SPhilipp Reisner 	};
504b411b363SPhilipp Reisner 	struct msghdr msg = {
505b411b363SPhilipp Reisner 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
506b411b363SPhilipp Reisner 	};
507aa563d7bSDavid Howells 	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
508f7765c36SAl Viro 	return sock_recvmsg(sock, &msg, msg.msg_flags);
509b411b363SPhilipp Reisner }
510b411b363SPhilipp Reisner 
511bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
512b411b363SPhilipp Reisner {
513b411b363SPhilipp Reisner 	int rv;
514b411b363SPhilipp Reisner 
515bde89a9eSAndreas Gruenbacher 	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
516b411b363SPhilipp Reisner 
517b411b363SPhilipp Reisner 	if (rv < 0) {
518b411b363SPhilipp Reisner 		if (rv == -ECONNRESET)
5191ec861ebSAndreas Gruenbacher 			drbd_info(connection, "sock was reset by peer\n");
520b411b363SPhilipp Reisner 		else if (rv != -ERESTARTSYS)
5211ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
522b411b363SPhilipp Reisner 	} else if (rv == 0) {
523bde89a9eSAndreas Gruenbacher 		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
524b66623e3SPhilipp Reisner 			long t;
525b66623e3SPhilipp Reisner 			rcu_read_lock();
526bde89a9eSAndreas Gruenbacher 			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
527b66623e3SPhilipp Reisner 			rcu_read_unlock();
528b66623e3SPhilipp Reisner 
529bde89a9eSAndreas Gruenbacher 			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
530b66623e3SPhilipp Reisner 
531599377acSPhilipp Reisner 			if (t)
532599377acSPhilipp Reisner 				goto out;
533599377acSPhilipp Reisner 		}
5341ec861ebSAndreas Gruenbacher 		drbd_info(connection, "sock was shut down by peer\n");
535599377acSPhilipp Reisner 	}
536599377acSPhilipp Reisner 
537b411b363SPhilipp Reisner 	if (rv != size)
538bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
539b411b363SPhilipp Reisner 
540599377acSPhilipp Reisner out:
541b411b363SPhilipp Reisner 	return rv;
542b411b363SPhilipp Reisner }
543b411b363SPhilipp Reisner 
544bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
545c6967746SAndreas Gruenbacher {
546c6967746SAndreas Gruenbacher 	int err;
547c6967746SAndreas Gruenbacher 
548bde89a9eSAndreas Gruenbacher 	err = drbd_recv(connection, buf, size);
549c6967746SAndreas Gruenbacher 	if (err != size) {
550c6967746SAndreas Gruenbacher 		if (err >= 0)
551c6967746SAndreas Gruenbacher 			err = -EIO;
552c6967746SAndreas Gruenbacher 	} else
553c6967746SAndreas Gruenbacher 		err = 0;
554c6967746SAndreas Gruenbacher 	return err;
555c6967746SAndreas Gruenbacher }
556c6967746SAndreas Gruenbacher 
557bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
558a5c31904SAndreas Gruenbacher {
559a5c31904SAndreas Gruenbacher 	int err;
560a5c31904SAndreas Gruenbacher 
561bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all(connection, buf, size);
562a5c31904SAndreas Gruenbacher 	if (err && !signal_pending(current))
5631ec861ebSAndreas Gruenbacher 		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
564a5c31904SAndreas Gruenbacher 	return err;
565a5c31904SAndreas Gruenbacher }
566a5c31904SAndreas Gruenbacher 
5675dbf1673SLars Ellenberg /* quoting tcp(7):
5685dbf1673SLars Ellenberg  *   On individual connections, the socket buffer size must be set prior to the
5695dbf1673SLars Ellenberg  *   listen(2) or connect(2) calls in order to have it take effect.
5705dbf1673SLars Ellenberg  * This is our wrapper to do so.
5715dbf1673SLars Ellenberg  */
5725dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5735dbf1673SLars Ellenberg 		unsigned int rcv)
5745dbf1673SLars Ellenberg {
5755dbf1673SLars Ellenberg 	/* open coded SO_SNDBUF, SO_RCVBUF */
5765dbf1673SLars Ellenberg 	if (snd) {
5775dbf1673SLars Ellenberg 		sock->sk->sk_sndbuf = snd;
5785dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5795dbf1673SLars Ellenberg 	}
5805dbf1673SLars Ellenberg 	if (rcv) {
5815dbf1673SLars Ellenberg 		sock->sk->sk_rcvbuf = rcv;
5825dbf1673SLars Ellenberg 		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5835dbf1673SLars Ellenberg 	}
5845dbf1673SLars Ellenberg }
5855dbf1673SLars Ellenberg 
586bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
587b411b363SPhilipp Reisner {
588b411b363SPhilipp Reisner 	const char *what;
589b411b363SPhilipp Reisner 	struct socket *sock;
590b411b363SPhilipp Reisner 	struct sockaddr_in6 src_in6;
59144ed167dSPhilipp Reisner 	struct sockaddr_in6 peer_in6;
59244ed167dSPhilipp Reisner 	struct net_conf *nc;
59344ed167dSPhilipp Reisner 	int err, peer_addr_len, my_addr_len;
59469ef82deSAndreas Gruenbacher 	int sndbuf_size, rcvbuf_size, connect_int;
595b411b363SPhilipp Reisner 	int disconnect_on_error = 1;
596b411b363SPhilipp Reisner 
59744ed167dSPhilipp Reisner 	rcu_read_lock();
598bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
59944ed167dSPhilipp Reisner 	if (!nc) {
60044ed167dSPhilipp Reisner 		rcu_read_unlock();
601b411b363SPhilipp Reisner 		return NULL;
60244ed167dSPhilipp Reisner 	}
60344ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
60444ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
60569ef82deSAndreas Gruenbacher 	connect_int = nc->connect_int;
606089c075dSAndreas Gruenbacher 	rcu_read_unlock();
60744ed167dSPhilipp Reisner 
608bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
609bde89a9eSAndreas Gruenbacher 	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61044ed167dSPhilipp Reisner 
611bde89a9eSAndreas Gruenbacher 	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61244ed167dSPhilipp Reisner 		src_in6.sin6_port = 0;
61344ed167dSPhilipp Reisner 	else
61444ed167dSPhilipp Reisner 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61544ed167dSPhilipp Reisner 
616bde89a9eSAndreas Gruenbacher 	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
617bde89a9eSAndreas Gruenbacher 	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
618b411b363SPhilipp Reisner 
619b411b363SPhilipp Reisner 	what = "sock_create_kern";
620eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
621b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &sock);
622b411b363SPhilipp Reisner 	if (err < 0) {
623b411b363SPhilipp Reisner 		sock = NULL;
624b411b363SPhilipp Reisner 		goto out;
625b411b363SPhilipp Reisner 	}
626b411b363SPhilipp Reisner 
627b411b363SPhilipp Reisner 	sock->sk->sk_rcvtimeo =
62869ef82deSAndreas Gruenbacher 	sock->sk->sk_sndtimeo = connect_int * HZ;
62944ed167dSPhilipp Reisner 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
630b411b363SPhilipp Reisner 
631b411b363SPhilipp Reisner        /* explicitly bind to the configured IP as source IP
632b411b363SPhilipp Reisner 	*  for the outgoing connections.
633b411b363SPhilipp Reisner 	*  This is needed for multihomed hosts and to be
634b411b363SPhilipp Reisner 	*  able to use lo: interfaces for drbd.
635b411b363SPhilipp Reisner 	* Make sure to use 0 as port number, so linux selects
636b411b363SPhilipp Reisner 	*  a free one dynamically.
637b411b363SPhilipp Reisner 	*/
638b411b363SPhilipp Reisner 	what = "bind before connect";
63944ed167dSPhilipp Reisner 	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
640b411b363SPhilipp Reisner 	if (err < 0)
641b411b363SPhilipp Reisner 		goto out;
642b411b363SPhilipp Reisner 
643b411b363SPhilipp Reisner 	/* connect may fail, peer not yet available.
644b411b363SPhilipp Reisner 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
645b411b363SPhilipp Reisner 	disconnect_on_error = 0;
646b411b363SPhilipp Reisner 	what = "connect";
64744ed167dSPhilipp Reisner 	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
648b411b363SPhilipp Reisner 
649b411b363SPhilipp Reisner out:
650b411b363SPhilipp Reisner 	if (err < 0) {
651b411b363SPhilipp Reisner 		if (sock) {
652b411b363SPhilipp Reisner 			sock_release(sock);
653b411b363SPhilipp Reisner 			sock = NULL;
654b411b363SPhilipp Reisner 		}
655b411b363SPhilipp Reisner 		switch (-err) {
656b411b363SPhilipp Reisner 			/* timeout, busy, signal pending */
657b411b363SPhilipp Reisner 		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
658b411b363SPhilipp Reisner 		case EINTR: case ERESTARTSYS:
659b411b363SPhilipp Reisner 			/* peer not (yet) available, network problem */
660b411b363SPhilipp Reisner 		case ECONNREFUSED: case ENETUNREACH:
661b411b363SPhilipp Reisner 		case EHOSTDOWN:    case EHOSTUNREACH:
662b411b363SPhilipp Reisner 			disconnect_on_error = 0;
663b411b363SPhilipp Reisner 			break;
664b411b363SPhilipp Reisner 		default:
6651ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
666b411b363SPhilipp Reisner 		}
667b411b363SPhilipp Reisner 		if (disconnect_on_error)
668bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
669b411b363SPhilipp Reisner 	}
67044ed167dSPhilipp Reisner 
671b411b363SPhilipp Reisner 	return sock;
672b411b363SPhilipp Reisner }
673b411b363SPhilipp Reisner 
6747a426fd8SPhilipp Reisner struct accept_wait_data {
675bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection;
6767a426fd8SPhilipp Reisner 	struct socket *s_listen;
6777a426fd8SPhilipp Reisner 	struct completion door_bell;
6787a426fd8SPhilipp Reisner 	void (*original_sk_state_change)(struct sock *sk);
6797a426fd8SPhilipp Reisner 
6807a426fd8SPhilipp Reisner };
6817a426fd8SPhilipp Reisner 
682715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
683b411b363SPhilipp Reisner {
6847a426fd8SPhilipp Reisner 	struct accept_wait_data *ad = sk->sk_user_data;
685715306f6SAndreas Gruenbacher 	void (*state_change)(struct sock *sk);
6867a426fd8SPhilipp Reisner 
687715306f6SAndreas Gruenbacher 	state_change = ad->original_sk_state_change;
688715306f6SAndreas Gruenbacher 	if (sk->sk_state == TCP_ESTABLISHED)
6897a426fd8SPhilipp Reisner 		complete(&ad->door_bell);
690715306f6SAndreas Gruenbacher 	state_change(sk);
6917a426fd8SPhilipp Reisner }
6927a426fd8SPhilipp Reisner 
693bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
694b411b363SPhilipp Reisner {
6951f3e509bSPhilipp Reisner 	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69644ed167dSPhilipp Reisner 	struct sockaddr_in6 my_addr;
6971f3e509bSPhilipp Reisner 	struct socket *s_listen;
69844ed167dSPhilipp Reisner 	struct net_conf *nc;
699b411b363SPhilipp Reisner 	const char *what;
700b411b363SPhilipp Reisner 
70144ed167dSPhilipp Reisner 	rcu_read_lock();
702bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
70344ed167dSPhilipp Reisner 	if (!nc) {
70444ed167dSPhilipp Reisner 		rcu_read_unlock();
7057a426fd8SPhilipp Reisner 		return -EIO;
70644ed167dSPhilipp Reisner 	}
70744ed167dSPhilipp Reisner 	sndbuf_size = nc->sndbuf_size;
70844ed167dSPhilipp Reisner 	rcvbuf_size = nc->rcvbuf_size;
70944ed167dSPhilipp Reisner 	rcu_read_unlock();
710b411b363SPhilipp Reisner 
711bde89a9eSAndreas Gruenbacher 	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
712bde89a9eSAndreas Gruenbacher 	memcpy(&my_addr, &connection->my_addr, my_addr_len);
713b411b363SPhilipp Reisner 
714b411b363SPhilipp Reisner 	what = "sock_create_kern";
715eeb1bd5cSEric W. Biederman 	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
716b411b363SPhilipp Reisner 			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
717b411b363SPhilipp Reisner 	if (err) {
718b411b363SPhilipp Reisner 		s_listen = NULL;
719b411b363SPhilipp Reisner 		goto out;
720b411b363SPhilipp Reisner 	}
721b411b363SPhilipp Reisner 
7224a17fd52SPavel Emelyanov 	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72344ed167dSPhilipp Reisner 	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
724b411b363SPhilipp Reisner 
725b411b363SPhilipp Reisner 	what = "bind before listen";
72644ed167dSPhilipp Reisner 	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
727b411b363SPhilipp Reisner 	if (err < 0)
728b411b363SPhilipp Reisner 		goto out;
729b411b363SPhilipp Reisner 
7307a426fd8SPhilipp Reisner 	ad->s_listen = s_listen;
7317a426fd8SPhilipp Reisner 	write_lock_bh(&s_listen->sk->sk_callback_lock);
7327a426fd8SPhilipp Reisner 	ad->original_sk_state_change = s_listen->sk->sk_state_change;
733715306f6SAndreas Gruenbacher 	s_listen->sk->sk_state_change = drbd_incoming_connection;
7347a426fd8SPhilipp Reisner 	s_listen->sk->sk_user_data = ad;
7357a426fd8SPhilipp Reisner 	write_unlock_bh(&s_listen->sk->sk_callback_lock);
736b411b363SPhilipp Reisner 
7372820fd39SPhilipp Reisner 	what = "listen";
7382820fd39SPhilipp Reisner 	err = s_listen->ops->listen(s_listen, 5);
7392820fd39SPhilipp Reisner 	if (err < 0)
7402820fd39SPhilipp Reisner 		goto out;
7412820fd39SPhilipp Reisner 
7427a426fd8SPhilipp Reisner 	return 0;
743b411b363SPhilipp Reisner out:
744b411b363SPhilipp Reisner 	if (s_listen)
745b411b363SPhilipp Reisner 		sock_release(s_listen);
746b411b363SPhilipp Reisner 	if (err < 0) {
747b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7481ec861ebSAndreas Gruenbacher 			drbd_err(connection, "%s failed, err = %d\n", what, err);
749bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
750b411b363SPhilipp Reisner 		}
751b411b363SPhilipp Reisner 	}
7521f3e509bSPhilipp Reisner 
7537a426fd8SPhilipp Reisner 	return -EIO;
7541f3e509bSPhilipp Reisner }
7551f3e509bSPhilipp Reisner 
756715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
757715306f6SAndreas Gruenbacher {
758715306f6SAndreas Gruenbacher 	write_lock_bh(&sk->sk_callback_lock);
759715306f6SAndreas Gruenbacher 	sk->sk_state_change = ad->original_sk_state_change;
760715306f6SAndreas Gruenbacher 	sk->sk_user_data = NULL;
761715306f6SAndreas Gruenbacher 	write_unlock_bh(&sk->sk_callback_lock);
762715306f6SAndreas Gruenbacher }
763715306f6SAndreas Gruenbacher 
764bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7651f3e509bSPhilipp Reisner {
7661f3e509bSPhilipp Reisner 	int timeo, connect_int, err = 0;
7671f3e509bSPhilipp Reisner 	struct socket *s_estab = NULL;
7681f3e509bSPhilipp Reisner 	struct net_conf *nc;
7691f3e509bSPhilipp Reisner 
7701f3e509bSPhilipp Reisner 	rcu_read_lock();
771bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
7721f3e509bSPhilipp Reisner 	if (!nc) {
7731f3e509bSPhilipp Reisner 		rcu_read_unlock();
7741f3e509bSPhilipp Reisner 		return NULL;
7751f3e509bSPhilipp Reisner 	}
7761f3e509bSPhilipp Reisner 	connect_int = nc->connect_int;
7771f3e509bSPhilipp Reisner 	rcu_read_unlock();
7781f3e509bSPhilipp Reisner 
7791f3e509bSPhilipp Reisner 	timeo = connect_int * HZ;
78038b682b2SAkinobu Mita 	/* 28.5% random jitter */
78138b682b2SAkinobu Mita 	timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
7821f3e509bSPhilipp Reisner 
7837a426fd8SPhilipp Reisner 	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7847a426fd8SPhilipp Reisner 	if (err <= 0)
7857a426fd8SPhilipp Reisner 		return NULL;
7861f3e509bSPhilipp Reisner 
7877a426fd8SPhilipp Reisner 	err = kernel_accept(ad->s_listen, &s_estab, 0);
788b411b363SPhilipp Reisner 	if (err < 0) {
789b411b363SPhilipp Reisner 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7901ec861ebSAndreas Gruenbacher 			drbd_err(connection, "accept failed, err = %d\n", err);
791bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
792b411b363SPhilipp Reisner 		}
793b411b363SPhilipp Reisner 	}
794b411b363SPhilipp Reisner 
795715306f6SAndreas Gruenbacher 	if (s_estab)
796715306f6SAndreas Gruenbacher 		unregister_state_change(s_estab->sk, ad);
797b411b363SPhilipp Reisner 
798b411b363SPhilipp Reisner 	return s_estab;
799b411b363SPhilipp Reisner }
800b411b363SPhilipp Reisner 
801bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
802b411b363SPhilipp Reisner 
803bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8049f5bdc33SAndreas Gruenbacher 			     enum drbd_packet cmd)
8059f5bdc33SAndreas Gruenbacher {
806bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock))
8079f5bdc33SAndreas Gruenbacher 		return -EIO;
808bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
809b411b363SPhilipp Reisner }
810b411b363SPhilipp Reisner 
811bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
812b411b363SPhilipp Reisner {
813bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
8149f5bdc33SAndreas Gruenbacher 	struct packet_info pi;
8154920e37aSPhilipp Reisner 	struct net_conf *nc;
8169f5bdc33SAndreas Gruenbacher 	int err;
817b411b363SPhilipp Reisner 
8184920e37aSPhilipp Reisner 	rcu_read_lock();
8194920e37aSPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8204920e37aSPhilipp Reisner 	if (!nc) {
8214920e37aSPhilipp Reisner 		rcu_read_unlock();
8224920e37aSPhilipp Reisner 		return -EIO;
8234920e37aSPhilipp Reisner 	}
8244920e37aSPhilipp Reisner 	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8254920e37aSPhilipp Reisner 	rcu_read_unlock();
8264920e37aSPhilipp Reisner 
827bde89a9eSAndreas Gruenbacher 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8289f5bdc33SAndreas Gruenbacher 	if (err != header_size) {
8299f5bdc33SAndreas Gruenbacher 		if (err >= 0)
8309f5bdc33SAndreas Gruenbacher 			err = -EIO;
8319f5bdc33SAndreas Gruenbacher 		return err;
8329f5bdc33SAndreas Gruenbacher 	}
833bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, connection->data.rbuf, &pi);
8349f5bdc33SAndreas Gruenbacher 	if (err)
8359f5bdc33SAndreas Gruenbacher 		return err;
8369f5bdc33SAndreas Gruenbacher 	return pi.cmd;
837b411b363SPhilipp Reisner }
838b411b363SPhilipp Reisner 
839b411b363SPhilipp Reisner /**
840b411b363SPhilipp Reisner  * drbd_socket_okay() - Free the socket if its connection is not okay
841b411b363SPhilipp Reisner  * @sock:	pointer to the pointer to the socket.
842b411b363SPhilipp Reisner  */
8435d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
844b411b363SPhilipp Reisner {
845b411b363SPhilipp Reisner 	int rr;
846b411b363SPhilipp Reisner 	char tb[4];
847b411b363SPhilipp Reisner 
848b411b363SPhilipp Reisner 	if (!*sock)
84981e84650SAndreas Gruenbacher 		return false;
850b411b363SPhilipp Reisner 
851dbd9eea0SPhilipp Reisner 	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
852b411b363SPhilipp Reisner 
853b411b363SPhilipp Reisner 	if (rr > 0 || rr == -EAGAIN) {
85481e84650SAndreas Gruenbacher 		return true;
855b411b363SPhilipp Reisner 	} else {
856b411b363SPhilipp Reisner 		sock_release(*sock);
857b411b363SPhilipp Reisner 		*sock = NULL;
85881e84650SAndreas Gruenbacher 		return false;
859b411b363SPhilipp Reisner 	}
860b411b363SPhilipp Reisner }
8615d0b17f1SPhilipp Reisner 
8625d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8635d0b17f1SPhilipp Reisner 				   struct socket **sock1,
8645d0b17f1SPhilipp Reisner 				   struct socket **sock2)
8655d0b17f1SPhilipp Reisner {
8665d0b17f1SPhilipp Reisner 	struct net_conf *nc;
8675d0b17f1SPhilipp Reisner 	int timeout;
8685d0b17f1SPhilipp Reisner 	bool ok;
8695d0b17f1SPhilipp Reisner 
8705d0b17f1SPhilipp Reisner 	if (!*sock1 || !*sock2)
8715d0b17f1SPhilipp Reisner 		return false;
8725d0b17f1SPhilipp Reisner 
8735d0b17f1SPhilipp Reisner 	rcu_read_lock();
8745d0b17f1SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
8755d0b17f1SPhilipp Reisner 	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8765d0b17f1SPhilipp Reisner 	rcu_read_unlock();
8775d0b17f1SPhilipp Reisner 	schedule_timeout_interruptible(timeout);
8785d0b17f1SPhilipp Reisner 
8795d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock1);
8805d0b17f1SPhilipp Reisner 	ok = drbd_socket_okay(sock2) && ok;
8815d0b17f1SPhilipp Reisner 
8825d0b17f1SPhilipp Reisner 	return ok;
8835d0b17f1SPhilipp Reisner }
8845d0b17f1SPhilipp Reisner 
8852325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8862325eb66SPhilipp Reisner    in a connection */
88769a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
888907599e0SPhilipp Reisner {
88969a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
8900829f5edSAndreas Gruenbacher 	int err;
891907599e0SPhilipp Reisner 
892b30ab791SAndreas Gruenbacher 	atomic_set(&device->packet_seq, 0);
893b30ab791SAndreas Gruenbacher 	device->peer_seq = 0;
894907599e0SPhilipp Reisner 
89569a22773SAndreas Gruenbacher 	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89669a22773SAndreas Gruenbacher 		&peer_device->connection->cstate_mutex :
897b30ab791SAndreas Gruenbacher 		&device->own_state_mutex;
8988410da8fSPhilipp Reisner 
89969a22773SAndreas Gruenbacher 	err = drbd_send_sync_param(peer_device);
9000829f5edSAndreas Gruenbacher 	if (!err)
90169a22773SAndreas Gruenbacher 		err = drbd_send_sizes(peer_device, 0, 0);
9020829f5edSAndreas Gruenbacher 	if (!err)
90369a22773SAndreas Gruenbacher 		err = drbd_send_uuids(peer_device);
9040829f5edSAndreas Gruenbacher 	if (!err)
90569a22773SAndreas Gruenbacher 		err = drbd_send_current_state(peer_device);
906b30ab791SAndreas Gruenbacher 	clear_bit(USE_DEGR_WFC_T, &device->flags);
907b30ab791SAndreas Gruenbacher 	clear_bit(RESIZE_PENDING, &device->flags);
908b30ab791SAndreas Gruenbacher 	atomic_set(&device->ap_in_flight, 0);
909b30ab791SAndreas Gruenbacher 	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9100829f5edSAndreas Gruenbacher 	return err;
911907599e0SPhilipp Reisner }
912b411b363SPhilipp Reisner 
913b411b363SPhilipp Reisner /*
914b411b363SPhilipp Reisner  * return values:
915b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
916b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
917b411b363SPhilipp Reisner  *  -1 peer talks different language,
918b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
919b411b363SPhilipp Reisner  *  -2 We do not have a network config...
920b411b363SPhilipp Reisner  */
921bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
922b411b363SPhilipp Reisner {
9237da35862SPhilipp Reisner 	struct drbd_socket sock, msock;
924c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
92544ed167dSPhilipp Reisner 	struct net_conf *nc;
9265d0b17f1SPhilipp Reisner 	int vnr, timeout, h;
9275d0b17f1SPhilipp Reisner 	bool discard_my_data, ok;
928197296ffSPhilipp Reisner 	enum drbd_state_rv rv;
9297a426fd8SPhilipp Reisner 	struct accept_wait_data ad = {
930bde89a9eSAndreas Gruenbacher 		.connection = connection,
9317a426fd8SPhilipp Reisner 		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9327a426fd8SPhilipp Reisner 	};
933b411b363SPhilipp Reisner 
934bde89a9eSAndreas Gruenbacher 	clear_bit(DISCONNECT_SENT, &connection->flags);
935bde89a9eSAndreas Gruenbacher 	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
936b411b363SPhilipp Reisner 		return -2;
937b411b363SPhilipp Reisner 
9387da35862SPhilipp Reisner 	mutex_init(&sock.mutex);
939bde89a9eSAndreas Gruenbacher 	sock.sbuf = connection->data.sbuf;
940bde89a9eSAndreas Gruenbacher 	sock.rbuf = connection->data.rbuf;
9417da35862SPhilipp Reisner 	sock.socket = NULL;
9427da35862SPhilipp Reisner 	mutex_init(&msock.mutex);
943bde89a9eSAndreas Gruenbacher 	msock.sbuf = connection->meta.sbuf;
944bde89a9eSAndreas Gruenbacher 	msock.rbuf = connection->meta.rbuf;
9457da35862SPhilipp Reisner 	msock.socket = NULL;
9467da35862SPhilipp Reisner 
9470916e0e3SAndreas Gruenbacher 	/* Assume that the peer only understands protocol 80 until we know better.  */
948bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = 80;
949b411b363SPhilipp Reisner 
950bde89a9eSAndreas Gruenbacher 	if (prepare_listen_socket(connection, &ad))
9517a426fd8SPhilipp Reisner 		return 0;
952b411b363SPhilipp Reisner 
953b411b363SPhilipp Reisner 	do {
9542bf89621SAndreas Gruenbacher 		struct socket *s;
955b411b363SPhilipp Reisner 
956bde89a9eSAndreas Gruenbacher 		s = drbd_try_connect(connection);
957b411b363SPhilipp Reisner 		if (s) {
9587da35862SPhilipp Reisner 			if (!sock.socket) {
9597da35862SPhilipp Reisner 				sock.socket = s;
960bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &sock, P_INITIAL_DATA);
9617da35862SPhilipp Reisner 			} else if (!msock.socket) {
962bde89a9eSAndreas Gruenbacher 				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9637da35862SPhilipp Reisner 				msock.socket = s;
964bde89a9eSAndreas Gruenbacher 				send_first_packet(connection, &msock, P_INITIAL_META);
965b411b363SPhilipp Reisner 			} else {
9661ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Logic error in conn_connect()\n");
967b411b363SPhilipp Reisner 				goto out_release_sockets;
968b411b363SPhilipp Reisner 			}
969b411b363SPhilipp Reisner 		}
970b411b363SPhilipp Reisner 
9715d0b17f1SPhilipp Reisner 		if (connection_established(connection, &sock.socket, &msock.socket))
972b411b363SPhilipp Reisner 			break;
973b411b363SPhilipp Reisner 
974b411b363SPhilipp Reisner retry:
975bde89a9eSAndreas Gruenbacher 		s = drbd_wait_for_connect(connection, &ad);
976b411b363SPhilipp Reisner 		if (s) {
977bde89a9eSAndreas Gruenbacher 			int fp = receive_first_packet(connection, s);
9787da35862SPhilipp Reisner 			drbd_socket_okay(&sock.socket);
9797da35862SPhilipp Reisner 			drbd_socket_okay(&msock.socket);
98092f14951SPhilipp Reisner 			switch (fp) {
981e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_DATA:
9827da35862SPhilipp Reisner 				if (sock.socket) {
9831ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet S crossed\n");
9847da35862SPhilipp Reisner 					sock_release(sock.socket);
98580c6eed4SPhilipp Reisner 					sock.socket = s;
98680c6eed4SPhilipp Reisner 					goto randomize;
987b411b363SPhilipp Reisner 				}
9887da35862SPhilipp Reisner 				sock.socket = s;
989b411b363SPhilipp Reisner 				break;
990e5d6f33aSAndreas Gruenbacher 			case P_INITIAL_META:
991bde89a9eSAndreas Gruenbacher 				set_bit(RESOLVE_CONFLICTS, &connection->flags);
9927da35862SPhilipp Reisner 				if (msock.socket) {
9931ec861ebSAndreas Gruenbacher 					drbd_warn(connection, "initial packet M crossed\n");
9947da35862SPhilipp Reisner 					sock_release(msock.socket);
99580c6eed4SPhilipp Reisner 					msock.socket = s;
99680c6eed4SPhilipp Reisner 					goto randomize;
997b411b363SPhilipp Reisner 				}
9987da35862SPhilipp Reisner 				msock.socket = s;
999b411b363SPhilipp Reisner 				break;
1000b411b363SPhilipp Reisner 			default:
10011ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Error receiving initial packet\n");
1002b411b363SPhilipp Reisner 				sock_release(s);
100380c6eed4SPhilipp Reisner randomize:
100438b682b2SAkinobu Mita 				if (prandom_u32() & 1)
1005b411b363SPhilipp Reisner 					goto retry;
1006b411b363SPhilipp Reisner 			}
1007b411b363SPhilipp Reisner 		}
1008b411b363SPhilipp Reisner 
1009bde89a9eSAndreas Gruenbacher 		if (connection->cstate <= C_DISCONNECTING)
1010b411b363SPhilipp Reisner 			goto out_release_sockets;
1011b411b363SPhilipp Reisner 		if (signal_pending(current)) {
1012b411b363SPhilipp Reisner 			flush_signals(current);
1013b411b363SPhilipp Reisner 			smp_rmb();
1014bde89a9eSAndreas Gruenbacher 			if (get_t_state(&connection->receiver) == EXITING)
1015b411b363SPhilipp Reisner 				goto out_release_sockets;
1016b411b363SPhilipp Reisner 		}
1017b411b363SPhilipp Reisner 
10185d0b17f1SPhilipp Reisner 		ok = connection_established(connection, &sock.socket, &msock.socket);
1019b666dbf8SPhilipp Reisner 	} while (!ok);
1020b411b363SPhilipp Reisner 
10217a426fd8SPhilipp Reisner 	if (ad.s_listen)
10227a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
1023b411b363SPhilipp Reisner 
102498683650SPhilipp Reisner 	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102598683650SPhilipp Reisner 	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1026b411b363SPhilipp Reisner 
10277da35862SPhilipp Reisner 	sock.socket->sk->sk_allocation = GFP_NOIO;
10287da35862SPhilipp Reisner 	msock.socket->sk->sk_allocation = GFP_NOIO;
1029b411b363SPhilipp Reisner 
10307da35862SPhilipp Reisner 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10317da35862SPhilipp Reisner 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1032b411b363SPhilipp Reisner 
1033b411b363SPhilipp Reisner 	/* NOT YET ...
1034bde89a9eSAndreas Gruenbacher 	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10357da35862SPhilipp Reisner 	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10366038178eSAndreas Gruenbacher 	 * first set it to the P_CONNECTION_FEATURES timeout,
1037b411b363SPhilipp Reisner 	 * which we set to 4x the configured ping_timeout. */
103844ed167dSPhilipp Reisner 	rcu_read_lock();
1039bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
1040b411b363SPhilipp Reisner 
10417da35862SPhilipp Reisner 	sock.socket->sk->sk_sndtimeo =
10427da35862SPhilipp Reisner 	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104344ed167dSPhilipp Reisner 
10447da35862SPhilipp Reisner 	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
104544ed167dSPhilipp Reisner 	timeout = nc->timeout * HZ / 10;
104608b165baSPhilipp Reisner 	discard_my_data = nc->discard_my_data;
104744ed167dSPhilipp Reisner 	rcu_read_unlock();
104844ed167dSPhilipp Reisner 
10497da35862SPhilipp Reisner 	msock.socket->sk->sk_sndtimeo = timeout;
1050b411b363SPhilipp Reisner 
1051b411b363SPhilipp Reisner 	/* we don't want delays.
105225985edcSLucas De Marchi 	 * we use TCP_CORK where appropriate, though */
10537da35862SPhilipp Reisner 	drbd_tcp_nodelay(sock.socket);
10547da35862SPhilipp Reisner 	drbd_tcp_nodelay(msock.socket);
1055b411b363SPhilipp Reisner 
1056bde89a9eSAndreas Gruenbacher 	connection->data.socket = sock.socket;
1057bde89a9eSAndreas Gruenbacher 	connection->meta.socket = msock.socket;
1058bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1059b411b363SPhilipp Reisner 
1060bde89a9eSAndreas Gruenbacher 	h = drbd_do_features(connection);
1061b411b363SPhilipp Reisner 	if (h <= 0)
1062b411b363SPhilipp Reisner 		return h;
1063b411b363SPhilipp Reisner 
1064bde89a9eSAndreas Gruenbacher 	if (connection->cram_hmac_tfm) {
1065b30ab791SAndreas Gruenbacher 		/* drbd_request_state(device, NS(conn, WFAuth)); */
1066bde89a9eSAndreas Gruenbacher 		switch (drbd_do_auth(connection)) {
1067b10d96cbSJohannes Thoma 		case -1:
10681ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed\n");
1069b411b363SPhilipp Reisner 			return -1;
1070b10d96cbSJohannes Thoma 		case 0:
10711ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Authentication of peer failed, trying again.\n");
1072b10d96cbSJohannes Thoma 			return 0;
1073b411b363SPhilipp Reisner 		}
1074b411b363SPhilipp Reisner 	}
1075b411b363SPhilipp Reisner 
1076bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_sndtimeo = timeout;
1077bde89a9eSAndreas Gruenbacher 	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1078b411b363SPhilipp Reisner 
1079bde89a9eSAndreas Gruenbacher 	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10807e2455c1SPhilipp Reisner 		return -1;
10811e86ac48SPhilipp Reisner 
108213c76abaSPhilipp Reisner 	/* Prevent a race between resync-handshake and
108313c76abaSPhilipp Reisner 	 * being promoted to Primary.
108413c76abaSPhilipp Reisner 	 *
108513c76abaSPhilipp Reisner 	 * Grab and release the state mutex, so we know that any current
108613c76abaSPhilipp Reisner 	 * drbd_set_role() is finished, and any incoming drbd_set_role
108713c76abaSPhilipp Reisner 	 * will see the STATE_SENT flag, and wait for it to be cleared.
108813c76abaSPhilipp Reisner 	 */
108931007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109031007745SPhilipp Reisner 		mutex_lock(peer_device->device->state_mutex);
109131007745SPhilipp Reisner 
1092cde81d99SLars Ellenberg 	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
1093cde81d99SLars Ellenberg 	spin_lock_irq(&connection->resource->req_lock);
109431007745SPhilipp Reisner 	set_bit(STATE_SENT, &connection->flags);
1095cde81d99SLars Ellenberg 	spin_unlock_irq(&connection->resource->req_lock);
109631007745SPhilipp Reisner 
109731007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109831007745SPhilipp Reisner 		mutex_unlock(peer_device->device->state_mutex);
109931007745SPhilipp Reisner 
110031007745SPhilipp Reisner 	rcu_read_lock();
110131007745SPhilipp Reisner 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110231007745SPhilipp Reisner 		struct drbd_device *device = peer_device->device;
110331007745SPhilipp Reisner 		kref_get(&device->kref);
110431007745SPhilipp Reisner 		rcu_read_unlock();
110513c76abaSPhilipp Reisner 
110608b165baSPhilipp Reisner 		if (discard_my_data)
1107b30ab791SAndreas Gruenbacher 			set_bit(DISCARD_MY_DATA, &device->flags);
110808b165baSPhilipp Reisner 		else
1109b30ab791SAndreas Gruenbacher 			clear_bit(DISCARD_MY_DATA, &device->flags);
111008b165baSPhilipp Reisner 
111169a22773SAndreas Gruenbacher 		drbd_connected(peer_device);
111205a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
1113c141ebdaSPhilipp Reisner 		rcu_read_lock();
1114c141ebdaSPhilipp Reisner 	}
1115c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1116c141ebdaSPhilipp Reisner 
1117bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1118bde89a9eSAndreas Gruenbacher 	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1119bde89a9eSAndreas Gruenbacher 		clear_bit(STATE_SENT, &connection->flags);
11201e86ac48SPhilipp Reisner 		return 0;
1121a1096a6eSPhilipp Reisner 	}
11221e86ac48SPhilipp Reisner 
11231c03e520SPhilipp Reisner 	drbd_thread_start(&connection->ack_receiver);
112439e91a60SLars Ellenberg 	/* opencoded create_singlethread_workqueue(),
112539e91a60SLars Ellenberg 	 * to be able to use format string arguments */
112639e91a60SLars Ellenberg 	connection->ack_sender =
112739e91a60SLars Ellenberg 		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1128668700b4SPhilipp Reisner 	if (!connection->ack_sender) {
1129668700b4SPhilipp Reisner 		drbd_err(connection, "Failed to create workqueue ack_sender\n");
1130668700b4SPhilipp Reisner 		return 0;
1131668700b4SPhilipp Reisner 	}
1132b411b363SPhilipp Reisner 
11330500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
113408b165baSPhilipp Reisner 	/* The discard_my_data flag is a single-shot modifier to the next
113508b165baSPhilipp Reisner 	 * connection attempt, the handshake of which is now well underway.
113608b165baSPhilipp Reisner 	 * No need for rcu style copying of the whole struct
113708b165baSPhilipp Reisner 	 * just to clear a single value. */
1138bde89a9eSAndreas Gruenbacher 	connection->net_conf->discard_my_data = 0;
11390500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
114008b165baSPhilipp Reisner 
1141d3fcb490SPhilipp Reisner 	return h;
1142b411b363SPhilipp Reisner 
1143b411b363SPhilipp Reisner out_release_sockets:
11447a426fd8SPhilipp Reisner 	if (ad.s_listen)
11457a426fd8SPhilipp Reisner 		sock_release(ad.s_listen);
11467da35862SPhilipp Reisner 	if (sock.socket)
11477da35862SPhilipp Reisner 		sock_release(sock.socket);
11487da35862SPhilipp Reisner 	if (msock.socket)
11497da35862SPhilipp Reisner 		sock_release(msock.socket);
1150b411b363SPhilipp Reisner 	return -1;
1151b411b363SPhilipp Reisner }
1152b411b363SPhilipp Reisner 
1153bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1154b411b363SPhilipp Reisner {
1155bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
1156b411b363SPhilipp Reisner 
11570c8e36d9SAndreas Gruenbacher 	if (header_size == sizeof(struct p_header100) &&
11580c8e36d9SAndreas Gruenbacher 	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11590c8e36d9SAndreas Gruenbacher 		struct p_header100 *h = header;
11600c8e36d9SAndreas Gruenbacher 		if (h->pad != 0) {
11611ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Header padding is not zero\n");
11620c8e36d9SAndreas Gruenbacher 			return -EINVAL;
116302918be2SPhilipp Reisner 		}
11640c8e36d9SAndreas Gruenbacher 		pi->vnr = be16_to_cpu(h->volume);
11650c8e36d9SAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
11660c8e36d9SAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
11670c8e36d9SAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header95) &&
1168e658983aSAndreas Gruenbacher 		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1169e658983aSAndreas Gruenbacher 		struct p_header95 *h = header;
1170e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1171b55d84baSAndreas Gruenbacher 		pi->size = be32_to_cpu(h->length);
1172eefc2f7dSPhilipp Reisner 		pi->vnr = 0;
1173e658983aSAndreas Gruenbacher 	} else if (header_size == sizeof(struct p_header80) &&
1174e658983aSAndreas Gruenbacher 		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1175e658983aSAndreas Gruenbacher 		struct p_header80 *h = header;
1176e658983aSAndreas Gruenbacher 		pi->cmd = be16_to_cpu(h->command);
1177e658983aSAndreas Gruenbacher 		pi->size = be16_to_cpu(h->length);
117877351055SPhilipp Reisner 		pi->vnr = 0;
117902918be2SPhilipp Reisner 	} else {
11801ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1181e658983aSAndreas Gruenbacher 			 be32_to_cpu(*(__be32 *)header),
1182bde89a9eSAndreas Gruenbacher 			 connection->agreed_pro_version);
11838172f3e9SAndreas Gruenbacher 		return -EINVAL;
1184b411b363SPhilipp Reisner 	}
1185e658983aSAndreas Gruenbacher 	pi->data = header + header_size;
11868172f3e9SAndreas Gruenbacher 	return 0;
1187b411b363SPhilipp Reisner }
1188b411b363SPhilipp Reisner 
1189c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1190c51a0ef3SLars Ellenberg {
1191c51a0ef3SLars Ellenberg 	if (current->plug == &connection->receiver_plug) {
1192c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
1193c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
1194c51a0ef3SLars Ellenberg 	} /* else: maybe just schedule() ?? */
1195c51a0ef3SLars Ellenberg }
1196c51a0ef3SLars Ellenberg 
1197bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1198257d0af6SPhilipp Reisner {
1199bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
120069bc7bc3SAndreas Gruenbacher 	int err;
1201257d0af6SPhilipp Reisner 
1202bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1203a5c31904SAndreas Gruenbacher 	if (err)
120469bc7bc3SAndreas Gruenbacher 		return err;
1205257d0af6SPhilipp Reisner 
1206bde89a9eSAndreas Gruenbacher 	err = decode_header(connection, buffer, pi);
1207bde89a9eSAndreas Gruenbacher 	connection->last_received = jiffies;
1208b411b363SPhilipp Reisner 
120969bc7bc3SAndreas Gruenbacher 	return err;
1210b411b363SPhilipp Reisner }
1211b411b363SPhilipp Reisner 
1212c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1213c51a0ef3SLars Ellenberg {
1214c51a0ef3SLars Ellenberg 	void *buffer = connection->data.rbuf;
1215c51a0ef3SLars Ellenberg 	unsigned int size = drbd_header_size(connection);
1216c51a0ef3SLars Ellenberg 	int err;
1217c51a0ef3SLars Ellenberg 
1218c51a0ef3SLars Ellenberg 	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1219c51a0ef3SLars Ellenberg 	if (err != size) {
1220c51a0ef3SLars Ellenberg 		/* If we have nothing in the receive buffer now, to reduce
1221c51a0ef3SLars Ellenberg 		 * application latency, try to drain the backend queues as
1222c51a0ef3SLars Ellenberg 		 * quickly as possible, and let remote TCP know what we have
1223c51a0ef3SLars Ellenberg 		 * received so far. */
1224c51a0ef3SLars Ellenberg 		if (err == -EAGAIN) {
1225c51a0ef3SLars Ellenberg 			drbd_tcp_quickack(connection->data.socket);
1226c51a0ef3SLars Ellenberg 			drbd_unplug_all_devices(connection);
1227c51a0ef3SLars Ellenberg 		}
1228c51a0ef3SLars Ellenberg 		if (err > 0) {
1229c51a0ef3SLars Ellenberg 			buffer += err;
1230c51a0ef3SLars Ellenberg 			size -= err;
1231c51a0ef3SLars Ellenberg 		}
1232c51a0ef3SLars Ellenberg 		err = drbd_recv_all_warn(connection, buffer, size);
1233c51a0ef3SLars Ellenberg 		if (err)
1234c51a0ef3SLars Ellenberg 			return err;
1235c51a0ef3SLars Ellenberg 	}
1236c51a0ef3SLars Ellenberg 
1237c51a0ef3SLars Ellenberg 	err = decode_header(connection, connection->data.rbuf, pi);
1238c51a0ef3SLars Ellenberg 	connection->last_received = jiffies;
1239c51a0ef3SLars Ellenberg 
1240c51a0ef3SLars Ellenberg 	return err;
1241c51a0ef3SLars Ellenberg }
1242f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1243f9ff0da5SLars Ellenberg  * We want to submit to all component volumes in parallel,
1244f9ff0da5SLars Ellenberg  * then wait for all completions.
1245f9ff0da5SLars Ellenberg  */
1246f9ff0da5SLars Ellenberg struct issue_flush_context {
1247f9ff0da5SLars Ellenberg 	atomic_t pending;
1248f9ff0da5SLars Ellenberg 	int error;
1249f9ff0da5SLars Ellenberg 	struct completion done;
1250f9ff0da5SLars Ellenberg };
1251f9ff0da5SLars Ellenberg struct one_flush_context {
1252f9ff0da5SLars Ellenberg 	struct drbd_device *device;
1253f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx;
1254f9ff0da5SLars Ellenberg };
1255f9ff0da5SLars Ellenberg 
12561ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1257f9ff0da5SLars Ellenberg {
1258f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = bio->bi_private;
1259f9ff0da5SLars Ellenberg 	struct drbd_device *device = octx->device;
1260f9ff0da5SLars Ellenberg 	struct issue_flush_context *ctx = octx->ctx;
1261f9ff0da5SLars Ellenberg 
12624e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
12634e4cbee9SChristoph Hellwig 		ctx->error = blk_status_to_errno(bio->bi_status);
12644e4cbee9SChristoph Hellwig 		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1265f9ff0da5SLars Ellenberg 	}
1266f9ff0da5SLars Ellenberg 	kfree(octx);
1267f9ff0da5SLars Ellenberg 	bio_put(bio);
1268f9ff0da5SLars Ellenberg 
1269f9ff0da5SLars Ellenberg 	clear_bit(FLUSH_PENDING, &device->flags);
1270f9ff0da5SLars Ellenberg 	put_ldev(device);
1271f9ff0da5SLars Ellenberg 	kref_put(&device->kref, drbd_destroy_device);
1272f9ff0da5SLars Ellenberg 
1273f9ff0da5SLars Ellenberg 	if (atomic_dec_and_test(&ctx->pending))
1274f9ff0da5SLars Ellenberg 		complete(&ctx->done);
1275f9ff0da5SLars Ellenberg }
1276f9ff0da5SLars Ellenberg 
1277f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1278f9ff0da5SLars Ellenberg {
1279f9ff0da5SLars Ellenberg 	struct bio *bio = bio_alloc(GFP_NOIO, 0);
1280f9ff0da5SLars Ellenberg 	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1281f9ff0da5SLars Ellenberg 	if (!bio || !octx) {
1282f9ff0da5SLars Ellenberg 		drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1283f9ff0da5SLars Ellenberg 		/* FIXME: what else can I do now?  disconnecting or detaching
1284f9ff0da5SLars Ellenberg 		 * really does not help to improve the state of the world, either.
1285f9ff0da5SLars Ellenberg 		 */
1286f9ff0da5SLars Ellenberg 		kfree(octx);
1287f9ff0da5SLars Ellenberg 		if (bio)
1288f9ff0da5SLars Ellenberg 			bio_put(bio);
1289f9ff0da5SLars Ellenberg 
1290f9ff0da5SLars Ellenberg 		ctx->error = -ENOMEM;
1291f9ff0da5SLars Ellenberg 		put_ldev(device);
1292f9ff0da5SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1293f9ff0da5SLars Ellenberg 		return;
1294f9ff0da5SLars Ellenberg 	}
1295f9ff0da5SLars Ellenberg 
1296f9ff0da5SLars Ellenberg 	octx->device = device;
1297f9ff0da5SLars Ellenberg 	octx->ctx = ctx;
129874d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1299f9ff0da5SLars Ellenberg 	bio->bi_private = octx;
1300f9ff0da5SLars Ellenberg 	bio->bi_end_io = one_flush_endio;
130170fd7614SChristoph Hellwig 	bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
1302f9ff0da5SLars Ellenberg 
1303f9ff0da5SLars Ellenberg 	device->flush_jif = jiffies;
1304f9ff0da5SLars Ellenberg 	set_bit(FLUSH_PENDING, &device->flags);
1305f9ff0da5SLars Ellenberg 	atomic_inc(&ctx->pending);
1306f9ff0da5SLars Ellenberg 	submit_bio(bio);
1307f9ff0da5SLars Ellenberg }
1308f9ff0da5SLars Ellenberg 
1309bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1310b411b363SPhilipp Reisner {
1311f9ff0da5SLars Ellenberg 	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1312c06ece6bSAndreas Gruenbacher 		struct drbd_peer_device *peer_device;
1313f9ff0da5SLars Ellenberg 		struct issue_flush_context ctx;
13144b0007c0SPhilipp Reisner 		int vnr;
1315b411b363SPhilipp Reisner 
1316f9ff0da5SLars Ellenberg 		atomic_set(&ctx.pending, 1);
1317f9ff0da5SLars Ellenberg 		ctx.error = 0;
1318f9ff0da5SLars Ellenberg 		init_completion(&ctx.done);
1319f9ff0da5SLars Ellenberg 
1320615e087fSLars Ellenberg 		rcu_read_lock();
1321c06ece6bSAndreas Gruenbacher 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1322c06ece6bSAndreas Gruenbacher 			struct drbd_device *device = peer_device->device;
1323c06ece6bSAndreas Gruenbacher 
1324b30ab791SAndreas Gruenbacher 			if (!get_ldev(device))
1325615e087fSLars Ellenberg 				continue;
1326b30ab791SAndreas Gruenbacher 			kref_get(&device->kref);
1327615e087fSLars Ellenberg 			rcu_read_unlock();
13284b0007c0SPhilipp Reisner 
1329f9ff0da5SLars Ellenberg 			submit_one_flush(device, &ctx);
1330f9ff0da5SLars Ellenberg 
1331f9ff0da5SLars Ellenberg 			rcu_read_lock();
1332f9ff0da5SLars Ellenberg 		}
1333f9ff0da5SLars Ellenberg 		rcu_read_unlock();
1334f9ff0da5SLars Ellenberg 
1335f9ff0da5SLars Ellenberg 		/* Do we want to add a timeout,
1336f9ff0da5SLars Ellenberg 		 * if disk-timeout is set? */
1337f9ff0da5SLars Ellenberg 		if (!atomic_dec_and_test(&ctx.pending))
1338f9ff0da5SLars Ellenberg 			wait_for_completion(&ctx.done);
1339f9ff0da5SLars Ellenberg 
1340f9ff0da5SLars Ellenberg 		if (ctx.error) {
1341b411b363SPhilipp Reisner 			/* would rather check on EOPNOTSUPP, but that is not reliable.
1342b411b363SPhilipp Reisner 			 * don't try again for ANY return value != 0
1343b411b363SPhilipp Reisner 			 * if (rv == -EOPNOTSUPP) */
1344f9ff0da5SLars Ellenberg 			/* Any error is already reported by bio_endio callback. */
1345f6ba8636SAndreas Gruenbacher 			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1346b411b363SPhilipp Reisner 		}
1347b411b363SPhilipp Reisner 	}
1348b411b363SPhilipp Reisner }
1349b411b363SPhilipp Reisner 
1350b411b363SPhilipp Reisner /**
1351b411b363SPhilipp Reisner  * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1352b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1353b411b363SPhilipp Reisner  * @epoch:	Epoch object.
1354b411b363SPhilipp Reisner  * @ev:		Epoch event.
1355b411b363SPhilipp Reisner  */
1356bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1357b411b363SPhilipp Reisner 					       struct drbd_epoch *epoch,
1358b411b363SPhilipp Reisner 					       enum epoch_event ev)
1359b411b363SPhilipp Reisner {
13602451fc3bSPhilipp Reisner 	int epoch_size;
1361b411b363SPhilipp Reisner 	struct drbd_epoch *next_epoch;
1362b411b363SPhilipp Reisner 	enum finish_epoch rv = FE_STILL_LIVE;
1363b411b363SPhilipp Reisner 
1364bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1365b411b363SPhilipp Reisner 	do {
1366b411b363SPhilipp Reisner 		next_epoch = NULL;
1367b411b363SPhilipp Reisner 
1368b411b363SPhilipp Reisner 		epoch_size = atomic_read(&epoch->epoch_size);
1369b411b363SPhilipp Reisner 
1370b411b363SPhilipp Reisner 		switch (ev & ~EV_CLEANUP) {
1371b411b363SPhilipp Reisner 		case EV_PUT:
1372b411b363SPhilipp Reisner 			atomic_dec(&epoch->active);
1373b411b363SPhilipp Reisner 			break;
1374b411b363SPhilipp Reisner 		case EV_GOT_BARRIER_NR:
1375b411b363SPhilipp Reisner 			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1376b411b363SPhilipp Reisner 			break;
1377b411b363SPhilipp Reisner 		case EV_BECAME_LAST:
1378b411b363SPhilipp Reisner 			/* nothing to do*/
1379b411b363SPhilipp Reisner 			break;
1380b411b363SPhilipp Reisner 		}
1381b411b363SPhilipp Reisner 
1382b411b363SPhilipp Reisner 		if (epoch_size != 0 &&
1383b411b363SPhilipp Reisner 		    atomic_read(&epoch->active) == 0 &&
138480f9fd55SPhilipp Reisner 		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1385b411b363SPhilipp Reisner 			if (!(ev & EV_CLEANUP)) {
1386bde89a9eSAndreas Gruenbacher 				spin_unlock(&connection->epoch_lock);
1387bde89a9eSAndreas Gruenbacher 				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1388bde89a9eSAndreas Gruenbacher 				spin_lock(&connection->epoch_lock);
1389b411b363SPhilipp Reisner 			}
13909ed57dcbSLars Ellenberg #if 0
13919ed57dcbSLars Ellenberg 			/* FIXME: dec unacked on connection, once we have
13929ed57dcbSLars Ellenberg 			 * something to count pending connection packets in. */
139380f9fd55SPhilipp Reisner 			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1394bde89a9eSAndreas Gruenbacher 				dec_unacked(epoch->connection);
13959ed57dcbSLars Ellenberg #endif
1396b411b363SPhilipp Reisner 
1397bde89a9eSAndreas Gruenbacher 			if (connection->current_epoch != epoch) {
1398b411b363SPhilipp Reisner 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1399b411b363SPhilipp Reisner 				list_del(&epoch->list);
1400b411b363SPhilipp Reisner 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1401bde89a9eSAndreas Gruenbacher 				connection->epochs--;
1402b411b363SPhilipp Reisner 				kfree(epoch);
1403b411b363SPhilipp Reisner 
1404b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1405b411b363SPhilipp Reisner 					rv = FE_DESTROYED;
1406b411b363SPhilipp Reisner 			} else {
1407b411b363SPhilipp Reisner 				epoch->flags = 0;
1408b411b363SPhilipp Reisner 				atomic_set(&epoch->epoch_size, 0);
1409698f9315SUwe Kleine-König 				/* atomic_set(&epoch->active, 0); is already zero */
1410b411b363SPhilipp Reisner 				if (rv == FE_STILL_LIVE)
1411b411b363SPhilipp Reisner 					rv = FE_RECYCLED;
1412b411b363SPhilipp Reisner 			}
1413b411b363SPhilipp Reisner 		}
1414b411b363SPhilipp Reisner 
1415b411b363SPhilipp Reisner 		if (!next_epoch)
1416b411b363SPhilipp Reisner 			break;
1417b411b363SPhilipp Reisner 
1418b411b363SPhilipp Reisner 		epoch = next_epoch;
1419b411b363SPhilipp Reisner 	} while (1);
1420b411b363SPhilipp Reisner 
1421bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1422b411b363SPhilipp Reisner 
1423b411b363SPhilipp Reisner 	return rv;
1424b411b363SPhilipp Reisner }
1425b411b363SPhilipp Reisner 
14268fe39aacSPhilipp Reisner static enum write_ordering_e
14278fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14288fe39aacSPhilipp Reisner {
14298fe39aacSPhilipp Reisner 	struct disk_conf *dc;
14308fe39aacSPhilipp Reisner 
14318fe39aacSPhilipp Reisner 	dc = rcu_dereference(bdev->disk_conf);
14328fe39aacSPhilipp Reisner 
1433f6ba8636SAndreas Gruenbacher 	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1434f6ba8636SAndreas Gruenbacher 		wo = WO_DRAIN_IO;
1435f6ba8636SAndreas Gruenbacher 	if (wo == WO_DRAIN_IO && !dc->disk_drain)
1436f6ba8636SAndreas Gruenbacher 		wo = WO_NONE;
14378fe39aacSPhilipp Reisner 
14388fe39aacSPhilipp Reisner 	return wo;
14398fe39aacSPhilipp Reisner }
14408fe39aacSPhilipp Reisner 
1441b411b363SPhilipp Reisner /**
1442b411b363SPhilipp Reisner  * drbd_bump_write_ordering() - Fall back to an other write ordering method
1443bde89a9eSAndreas Gruenbacher  * @connection:	DRBD connection.
1444b411b363SPhilipp Reisner  * @wo:		Write ordering method to try.
1445b411b363SPhilipp Reisner  */
14468fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14478fe39aacSPhilipp Reisner 			      enum write_ordering_e wo)
1448b411b363SPhilipp Reisner {
1449e9526580SPhilipp Reisner 	struct drbd_device *device;
1450b411b363SPhilipp Reisner 	enum write_ordering_e pwo;
14514b0007c0SPhilipp Reisner 	int vnr;
1452b411b363SPhilipp Reisner 	static char *write_ordering_str[] = {
1453f6ba8636SAndreas Gruenbacher 		[WO_NONE] = "none",
1454f6ba8636SAndreas Gruenbacher 		[WO_DRAIN_IO] = "drain",
1455f6ba8636SAndreas Gruenbacher 		[WO_BDEV_FLUSH] = "flush",
1456b411b363SPhilipp Reisner 	};
1457b411b363SPhilipp Reisner 
1458e9526580SPhilipp Reisner 	pwo = resource->write_ordering;
1459f6ba8636SAndreas Gruenbacher 	if (wo != WO_BDEV_FLUSH)
1460b411b363SPhilipp Reisner 		wo = min(pwo, wo);
1461daeda1ccSPhilipp Reisner 	rcu_read_lock();
1462e9526580SPhilipp Reisner 	idr_for_each_entry(&resource->devices, device, vnr) {
14638fe39aacSPhilipp Reisner 		if (get_ldev(device)) {
14648fe39aacSPhilipp Reisner 			wo = max_allowed_wo(device->ldev, wo);
14658fe39aacSPhilipp Reisner 			if (device->ldev == bdev)
14668fe39aacSPhilipp Reisner 				bdev = NULL;
1467b30ab791SAndreas Gruenbacher 			put_ldev(device);
14684b0007c0SPhilipp Reisner 		}
14698fe39aacSPhilipp Reisner 	}
14708fe39aacSPhilipp Reisner 
14718fe39aacSPhilipp Reisner 	if (bdev)
14728fe39aacSPhilipp Reisner 		wo = max_allowed_wo(bdev, wo);
14738fe39aacSPhilipp Reisner 
147470df7092SLars Ellenberg 	rcu_read_unlock();
147570df7092SLars Ellenberg 
1476e9526580SPhilipp Reisner 	resource->write_ordering = wo;
1477f6ba8636SAndreas Gruenbacher 	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1478e9526580SPhilipp Reisner 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1479b411b363SPhilipp Reisner }
1480b411b363SPhilipp Reisner 
1481f31e583aSLars Ellenberg /*
1482f31e583aSLars Ellenberg  * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1483f31e583aSLars Ellenberg  * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1484f31e583aSLars Ellenberg  * will directly go to fallback mode, submitting normal writes, and
1485f31e583aSLars Ellenberg  * never even try to UNMAP.
1486f31e583aSLars Ellenberg  *
1487f31e583aSLars Ellenberg  * And dm-thin does not do this (yet), mostly because in general it has
1488f31e583aSLars Ellenberg  * to assume that "skip_block_zeroing" is set.  See also:
1489f31e583aSLars Ellenberg  * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1490f31e583aSLars Ellenberg  * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1491f31e583aSLars Ellenberg  *
1492f31e583aSLars Ellenberg  * We *may* ignore the discard-zeroes-data setting, if so configured.
1493f31e583aSLars Ellenberg  *
1494f31e583aSLars Ellenberg  * Assumption is that this "discard_zeroes_data=0" is only because the backend
1495f31e583aSLars Ellenberg  * may ignore partial unaligned discards.
1496f31e583aSLars Ellenberg  *
1497f31e583aSLars Ellenberg  * LVM/DM thin as of at least
1498f31e583aSLars Ellenberg  *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
1499f31e583aSLars Ellenberg  *   Library version: 1.02.93-RHEL7 (2015-01-28)
1500f31e583aSLars Ellenberg  *   Driver version:  4.29.0
1501f31e583aSLars Ellenberg  * still behaves this way.
1502f31e583aSLars Ellenberg  *
1503f31e583aSLars Ellenberg  * For unaligned (wrt. alignment and granularity) or too small discards,
1504f31e583aSLars Ellenberg  * we zero-out the initial (and/or) trailing unaligned partial chunks,
1505f31e583aSLars Ellenberg  * but discard all the aligned full chunks.
1506f31e583aSLars Ellenberg  *
1507f31e583aSLars Ellenberg  * At least for LVM/DM thin, with skip_block_zeroing=false,
1508f31e583aSLars Ellenberg  * the result is effectively "discard_zeroes_data=1".
1509f31e583aSLars Ellenberg  */
1510f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
1511f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1512dd4f699dSLars Ellenberg {
15130dbed96aSChristoph Hellwig 	struct block_device *bdev = device->ldev->backing_bdev;
1514f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(bdev);
1515f31e583aSLars Ellenberg 	sector_t tmp, nr;
1516f31e583aSLars Ellenberg 	unsigned int max_discard_sectors, granularity;
1517f31e583aSLars Ellenberg 	int alignment;
1518f31e583aSLars Ellenberg 	int err = 0;
1519dd4f699dSLars Ellenberg 
1520f31e583aSLars Ellenberg 	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1521f31e583aSLars Ellenberg 		goto zero_out;
1522f31e583aSLars Ellenberg 
1523f31e583aSLars Ellenberg 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
1524f31e583aSLars Ellenberg 	granularity = max(q->limits.discard_granularity >> 9, 1U);
1525f31e583aSLars Ellenberg 	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1526f31e583aSLars Ellenberg 
1527f31e583aSLars Ellenberg 	max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1528f31e583aSLars Ellenberg 	max_discard_sectors -= max_discard_sectors % granularity;
1529f31e583aSLars Ellenberg 	if (unlikely(!max_discard_sectors))
1530f31e583aSLars Ellenberg 		goto zero_out;
1531f31e583aSLars Ellenberg 
1532f31e583aSLars Ellenberg 	if (nr_sectors < granularity)
1533f31e583aSLars Ellenberg 		goto zero_out;
1534f31e583aSLars Ellenberg 
1535f31e583aSLars Ellenberg 	tmp = start;
1536f31e583aSLars Ellenberg 	if (sector_div(tmp, granularity) != alignment) {
1537f31e583aSLars Ellenberg 		if (nr_sectors < 2*granularity)
1538f31e583aSLars Ellenberg 			goto zero_out;
1539f31e583aSLars Ellenberg 		/* start + gran - (start + gran - align) % gran */
1540f31e583aSLars Ellenberg 		tmp = start + granularity - alignment;
1541f31e583aSLars Ellenberg 		tmp = start + granularity - sector_div(tmp, granularity);
1542f31e583aSLars Ellenberg 
1543f31e583aSLars Ellenberg 		nr = tmp - start;
1544f31e583aSLars Ellenberg 		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1545f31e583aSLars Ellenberg 		 * layers are below us, some may have smaller granularity */
1546f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1547f31e583aSLars Ellenberg 		nr_sectors -= nr;
1548f31e583aSLars Ellenberg 		start = tmp;
1549f31e583aSLars Ellenberg 	}
1550f31e583aSLars Ellenberg 	while (nr_sectors >= max_discard_sectors) {
1551f31e583aSLars Ellenberg 		err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
1552f31e583aSLars Ellenberg 		nr_sectors -= max_discard_sectors;
1553f31e583aSLars Ellenberg 		start += max_discard_sectors;
1554f31e583aSLars Ellenberg 	}
1555f31e583aSLars Ellenberg 	if (nr_sectors) {
1556f31e583aSLars Ellenberg 		/* max_discard_sectors is unsigned int (and a multiple of
1557f31e583aSLars Ellenberg 		 * granularity, we made sure of that above already);
1558f31e583aSLars Ellenberg 		 * nr is < max_discard_sectors;
1559f31e583aSLars Ellenberg 		 * I don't need sector_div here, even though nr is sector_t */
1560f31e583aSLars Ellenberg 		nr = nr_sectors;
1561f31e583aSLars Ellenberg 		nr -= (unsigned int)nr % granularity;
1562f31e583aSLars Ellenberg 		if (nr) {
1563f31e583aSLars Ellenberg 			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1564f31e583aSLars Ellenberg 			nr_sectors -= nr;
1565f31e583aSLars Ellenberg 			start += nr;
1566f31e583aSLars Ellenberg 		}
1567f31e583aSLars Ellenberg 	}
1568f31e583aSLars Ellenberg  zero_out:
1569f31e583aSLars Ellenberg 	if (nr_sectors) {
1570f31e583aSLars Ellenberg 		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1571f31e583aSLars Ellenberg 				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1572f31e583aSLars Ellenberg 	}
1573f31e583aSLars Ellenberg 	return err != 0;
1574f31e583aSLars Ellenberg }
1575f31e583aSLars Ellenberg 
1576f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1577f31e583aSLars Ellenberg {
1578f31e583aSLars Ellenberg 	struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1579f31e583aSLars Ellenberg 	struct disk_conf *dc;
1580f31e583aSLars Ellenberg 	bool can_do;
1581f31e583aSLars Ellenberg 
1582f31e583aSLars Ellenberg 	if (!blk_queue_discard(q))
1583f31e583aSLars Ellenberg 		return false;
1584f31e583aSLars Ellenberg 
1585f31e583aSLars Ellenberg 	rcu_read_lock();
1586f31e583aSLars Ellenberg 	dc = rcu_dereference(device->ldev->disk_conf);
1587f31e583aSLars Ellenberg 	can_do = dc->discard_zeroes_if_aligned;
1588f31e583aSLars Ellenberg 	rcu_read_unlock();
1589f31e583aSLars Ellenberg 	return can_do;
1590f31e583aSLars Ellenberg }
1591f31e583aSLars Ellenberg 
1592f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1593f31e583aSLars Ellenberg {
1594f31e583aSLars Ellenberg 	/* If the backend cannot discard, or does not guarantee
1595f31e583aSLars Ellenberg 	 * read-back zeroes in discarded ranges, we fall back to
1596f31e583aSLars Ellenberg 	 * zero-out.  Unless configuration specifically requested
1597f31e583aSLars Ellenberg 	 * otherwise. */
1598f31e583aSLars Ellenberg 	if (!can_do_reliable_discards(device))
1599f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
1600f31e583aSLars Ellenberg 
1601f31e583aSLars Ellenberg 	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1602f31e583aSLars Ellenberg 	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1603dd4f699dSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
1604dd4f699dSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
1605dd4f699dSLars Ellenberg }
1606dd4f699dSLars Ellenberg 
16079104d31aSLars Ellenberg static void drbd_issue_peer_wsame(struct drbd_device *device,
16089104d31aSLars Ellenberg 				  struct drbd_peer_request *peer_req)
16099104d31aSLars Ellenberg {
16109104d31aSLars Ellenberg 	struct block_device *bdev = device->ldev->backing_bdev;
16119104d31aSLars Ellenberg 	sector_t s = peer_req->i.sector;
16129104d31aSLars Ellenberg 	sector_t nr = peer_req->i.size >> 9;
16139104d31aSLars Ellenberg 	if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
16149104d31aSLars Ellenberg 		peer_req->flags |= EE_WAS_ERROR;
16159104d31aSLars Ellenberg 	drbd_endio_write_sec_final(peer_req);
16169104d31aSLars Ellenberg }
16179104d31aSLars Ellenberg 
16189104d31aSLars Ellenberg 
1619b411b363SPhilipp Reisner /**
1620fbe29decSAndreas Gruenbacher  * drbd_submit_peer_request()
1621b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1622db830c46SAndreas Gruenbacher  * @peer_req:	peer request
16231eff9d32SJens Axboe  * @rw:		flag field, see bio->bi_opf
162410f6d992SLars Ellenberg  *
162510f6d992SLars Ellenberg  * May spread the pages to multiple bios,
162610f6d992SLars Ellenberg  * depending on bio_add_page restrictions.
162710f6d992SLars Ellenberg  *
162810f6d992SLars Ellenberg  * Returns 0 if all bios have been submitted,
162910f6d992SLars Ellenberg  * -ENOMEM if we could not allocate enough bios,
163010f6d992SLars Ellenberg  * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
163110f6d992SLars Ellenberg  *  single page to an empty bio (which should never happen and likely indicates
163210f6d992SLars Ellenberg  *  that the lower level IO stack is in some way broken). This has been observed
163310f6d992SLars Ellenberg  *  on certain Xen deployments.
163445bb912bSLars Ellenberg  */
163545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
1636b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device,
1637fbe29decSAndreas Gruenbacher 			     struct drbd_peer_request *peer_req,
1638bb3cc85eSMike Christie 			     const unsigned op, const unsigned op_flags,
1639bb3cc85eSMike Christie 			     const int fault_type)
164045bb912bSLars Ellenberg {
164145bb912bSLars Ellenberg 	struct bio *bios = NULL;
164245bb912bSLars Ellenberg 	struct bio *bio;
1643db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
1644db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
164511f8b2b6SAndreas Gruenbacher 	unsigned data_size = peer_req->i.size;
164645bb912bSLars Ellenberg 	unsigned n_bios = 0;
164711f8b2b6SAndreas Gruenbacher 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
164810f6d992SLars Ellenberg 	int err = -ENOMEM;
164945bb912bSLars Ellenberg 
1650dd4f699dSLars Ellenberg 	/* TRIM/DISCARD: for now, always use the helper function
1651dd4f699dSLars Ellenberg 	 * blkdev_issue_zeroout(..., discard=true).
1652dd4f699dSLars Ellenberg 	 * It's synchronous, but it does the right thing wrt. bio splitting.
1653dd4f699dSLars Ellenberg 	 * Correctness first, performance later.  Next step is to code an
1654dd4f699dSLars Ellenberg 	 * asynchronous variant of the same.
1655dd4f699dSLars Ellenberg 	 */
1656f31e583aSLars Ellenberg 	if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
1657a0fb3c47SLars Ellenberg 		/* wait for all pending IO completions, before we start
1658a0fb3c47SLars Ellenberg 		 * zeroing things out. */
16595dd2ca19SAndreas Gruenbacher 		conn_wait_active_ee_empty(peer_req->peer_device->connection);
166045d2933cSLars Ellenberg 		/* add it to the active list now,
166145d2933cSLars Ellenberg 		 * so we can find it to present it in debugfs */
166221ae5d7fSLars Ellenberg 		peer_req->submit_jif = jiffies;
166321ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SUBMITTED;
1664700ca8c0SPhilipp Reisner 
1665700ca8c0SPhilipp Reisner 		/* If this was a resync request from receive_rs_deallocated(),
1666700ca8c0SPhilipp Reisner 		 * it is already on the sync_ee list */
1667700ca8c0SPhilipp Reisner 		if (list_empty(&peer_req->w.list)) {
166845d2933cSLars Ellenberg 			spin_lock_irq(&device->resource->req_lock);
166945d2933cSLars Ellenberg 			list_add_tail(&peer_req->w.list, &device->active_ee);
167045d2933cSLars Ellenberg 			spin_unlock_irq(&device->resource->req_lock);
1671700ca8c0SPhilipp Reisner 		}
1672700ca8c0SPhilipp Reisner 
1673f31e583aSLars Ellenberg 		if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
1674f31e583aSLars Ellenberg 			drbd_issue_peer_discard_or_zero_out(device, peer_req);
16759104d31aSLars Ellenberg 		else /* EE_WRITE_SAME */
16769104d31aSLars Ellenberg 			drbd_issue_peer_wsame(device, peer_req);
1677a0fb3c47SLars Ellenberg 		return 0;
1678a0fb3c47SLars Ellenberg 	}
1679a0fb3c47SLars Ellenberg 
168045bb912bSLars Ellenberg 	/* In most cases, we will only need one bio.  But in case the lower
168145bb912bSLars Ellenberg 	 * level restrictions happen to be different at this offset on this
168245bb912bSLars Ellenberg 	 * side than those of the sending peer, we may need to submit the
16839476f39dSLars Ellenberg 	 * request in more than one bio.
16849476f39dSLars Ellenberg 	 *
16859476f39dSLars Ellenberg 	 * Plain bio_alloc is good enough here, this is no DRBD internally
16869476f39dSLars Ellenberg 	 * generated bio, but a bio allocated on behalf of the peer.
16879476f39dSLars Ellenberg 	 */
168845bb912bSLars Ellenberg next_bio:
168945bb912bSLars Ellenberg 	bio = bio_alloc(GFP_NOIO, nr_pages);
169045bb912bSLars Ellenberg 	if (!bio) {
1691a0fb3c47SLars Ellenberg 		drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
169245bb912bSLars Ellenberg 		goto fail;
169345bb912bSLars Ellenberg 	}
1694db830c46SAndreas Gruenbacher 	/* > peer_req->i.sector, unless this is the first bio */
16954f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
169674d46992SChristoph Hellwig 	bio_set_dev(bio, device->ldev->backing_bdev);
1697bb3cc85eSMike Christie 	bio_set_op_attrs(bio, op, op_flags);
1698db830c46SAndreas Gruenbacher 	bio->bi_private = peer_req;
1699fcefa62eSAndreas Gruenbacher 	bio->bi_end_io = drbd_peer_request_endio;
170045bb912bSLars Ellenberg 
170145bb912bSLars Ellenberg 	bio->bi_next = bios;
170245bb912bSLars Ellenberg 	bios = bio;
170345bb912bSLars Ellenberg 	++n_bios;
170445bb912bSLars Ellenberg 
170545bb912bSLars Ellenberg 	page_chain_for_each(page) {
170611f8b2b6SAndreas Gruenbacher 		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170706efffdaSMing Lei 		if (!bio_add_page(bio, page, len, 0))
170845bb912bSLars Ellenberg 			goto next_bio;
170911f8b2b6SAndreas Gruenbacher 		data_size -= len;
171045bb912bSLars Ellenberg 		sector += len >> 9;
171145bb912bSLars Ellenberg 		--nr_pages;
171245bb912bSLars Ellenberg 	}
171311f8b2b6SAndreas Gruenbacher 	D_ASSERT(device, data_size == 0);
1714a0fb3c47SLars Ellenberg 	D_ASSERT(device, page == NULL);
171545bb912bSLars Ellenberg 
1716db830c46SAndreas Gruenbacher 	atomic_set(&peer_req->pending_bios, n_bios);
171721ae5d7fSLars Ellenberg 	/* for debugfs: update timestamp, mark as submitted */
171821ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
171921ae5d7fSLars Ellenberg 	peer_req->flags |= EE_SUBMITTED;
172045bb912bSLars Ellenberg 	do {
172145bb912bSLars Ellenberg 		bio = bios;
172245bb912bSLars Ellenberg 		bios = bios->bi_next;
172345bb912bSLars Ellenberg 		bio->bi_next = NULL;
172445bb912bSLars Ellenberg 
1725b30ab791SAndreas Gruenbacher 		drbd_generic_make_request(device, fault_type, bio);
172645bb912bSLars Ellenberg 	} while (bios);
172745bb912bSLars Ellenberg 	return 0;
172845bb912bSLars Ellenberg 
172945bb912bSLars Ellenberg fail:
173045bb912bSLars Ellenberg 	while (bios) {
173145bb912bSLars Ellenberg 		bio = bios;
173245bb912bSLars Ellenberg 		bios = bios->bi_next;
173345bb912bSLars Ellenberg 		bio_put(bio);
173445bb912bSLars Ellenberg 	}
173510f6d992SLars Ellenberg 	return err;
173645bb912bSLars Ellenberg }
173745bb912bSLars Ellenberg 
1738b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1739db830c46SAndreas Gruenbacher 					     struct drbd_peer_request *peer_req)
174053840641SAndreas Gruenbacher {
1741db830c46SAndreas Gruenbacher 	struct drbd_interval *i = &peer_req->i;
174253840641SAndreas Gruenbacher 
1743b30ab791SAndreas Gruenbacher 	drbd_remove_interval(&device->write_requests, i);
174453840641SAndreas Gruenbacher 	drbd_clear_interval(i);
174553840641SAndreas Gruenbacher 
17466c852becSAndreas Gruenbacher 	/* Wake up any processes waiting for this peer request to complete.  */
174753840641SAndreas Gruenbacher 	if (i->waiting)
1748b30ab791SAndreas Gruenbacher 		wake_up(&device->misc_wait);
174953840641SAndreas Gruenbacher }
175053840641SAndreas Gruenbacher 
1751bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
175277fede51SPhilipp Reisner {
1753c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
175477fede51SPhilipp Reisner 	int vnr;
175577fede51SPhilipp Reisner 
175677fede51SPhilipp Reisner 	rcu_read_lock();
1757c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1758c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
1759c06ece6bSAndreas Gruenbacher 
1760b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
176177fede51SPhilipp Reisner 		rcu_read_unlock();
1762b30ab791SAndreas Gruenbacher 		drbd_wait_ee_list_empty(device, &device->active_ee);
176305a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
176477fede51SPhilipp Reisner 		rcu_read_lock();
176577fede51SPhilipp Reisner 	}
176677fede51SPhilipp Reisner 	rcu_read_unlock();
176777fede51SPhilipp Reisner }
176877fede51SPhilipp Reisner 
1769bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1770b411b363SPhilipp Reisner {
17712451fc3bSPhilipp Reisner 	int rv;
1772e658983aSAndreas Gruenbacher 	struct p_barrier *p = pi->data;
1773b411b363SPhilipp Reisner 	struct drbd_epoch *epoch;
1774b411b363SPhilipp Reisner 
17759ed57dcbSLars Ellenberg 	/* FIXME these are unacked on connection,
17769ed57dcbSLars Ellenberg 	 * not a specific (peer)device.
17779ed57dcbSLars Ellenberg 	 */
1778bde89a9eSAndreas Gruenbacher 	connection->current_epoch->barrier_nr = p->barrier;
1779bde89a9eSAndreas Gruenbacher 	connection->current_epoch->connection = connection;
1780bde89a9eSAndreas Gruenbacher 	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1781b411b363SPhilipp Reisner 
1782b411b363SPhilipp Reisner 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1783b411b363SPhilipp Reisner 	 * the activity log, which means it would not be resynced in case the
1784b411b363SPhilipp Reisner 	 * R_PRIMARY crashes now.
1785b411b363SPhilipp Reisner 	 * Therefore we must send the barrier_ack after the barrier request was
1786b411b363SPhilipp Reisner 	 * completed. */
1787e9526580SPhilipp Reisner 	switch (connection->resource->write_ordering) {
1788f6ba8636SAndreas Gruenbacher 	case WO_NONE:
1789b411b363SPhilipp Reisner 		if (rv == FE_RECYCLED)
179082bc0194SAndreas Gruenbacher 			return 0;
1791b411b363SPhilipp Reisner 
1792b411b363SPhilipp Reisner 		/* receiver context, in the writeout path of the other node.
1793b411b363SPhilipp Reisner 		 * avoid potential distributed deadlock */
1794b411b363SPhilipp Reisner 		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17952451fc3bSPhilipp Reisner 		if (epoch)
17962451fc3bSPhilipp Reisner 			break;
17972451fc3bSPhilipp Reisner 		else
17981ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
17992451fc3bSPhilipp Reisner 			/* Fall through */
18002451fc3bSPhilipp Reisner 
1801f6ba8636SAndreas Gruenbacher 	case WO_BDEV_FLUSH:
1802f6ba8636SAndreas Gruenbacher 	case WO_DRAIN_IO:
1803bde89a9eSAndreas Gruenbacher 		conn_wait_active_ee_empty(connection);
1804bde89a9eSAndreas Gruenbacher 		drbd_flush(connection);
18052451fc3bSPhilipp Reisner 
1806bde89a9eSAndreas Gruenbacher 		if (atomic_read(&connection->current_epoch->epoch_size)) {
18072451fc3bSPhilipp Reisner 			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
18082451fc3bSPhilipp Reisner 			if (epoch)
18092451fc3bSPhilipp Reisner 				break;
1810b411b363SPhilipp Reisner 		}
1811b411b363SPhilipp Reisner 
181282bc0194SAndreas Gruenbacher 		return 0;
18132451fc3bSPhilipp Reisner 	default:
1814e9526580SPhilipp Reisner 		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1815e9526580SPhilipp Reisner 			 connection->resource->write_ordering);
181682bc0194SAndreas Gruenbacher 		return -EIO;
1817b411b363SPhilipp Reisner 	}
1818b411b363SPhilipp Reisner 
1819b411b363SPhilipp Reisner 	epoch->flags = 0;
1820b411b363SPhilipp Reisner 	atomic_set(&epoch->epoch_size, 0);
1821b411b363SPhilipp Reisner 	atomic_set(&epoch->active, 0);
1822b411b363SPhilipp Reisner 
1823bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
1824bde89a9eSAndreas Gruenbacher 	if (atomic_read(&connection->current_epoch->epoch_size)) {
1825bde89a9eSAndreas Gruenbacher 		list_add(&epoch->list, &connection->current_epoch->list);
1826bde89a9eSAndreas Gruenbacher 		connection->current_epoch = epoch;
1827bde89a9eSAndreas Gruenbacher 		connection->epochs++;
1828b411b363SPhilipp Reisner 	} else {
1829b411b363SPhilipp Reisner 		/* The current_epoch got recycled while we allocated this one... */
1830b411b363SPhilipp Reisner 		kfree(epoch);
1831b411b363SPhilipp Reisner 	}
1832bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
1833b411b363SPhilipp Reisner 
183482bc0194SAndreas Gruenbacher 	return 0;
1835b411b363SPhilipp Reisner }
1836b411b363SPhilipp Reisner 
18379104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
18383d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18399104d31aSLars Ellenberg 			      struct drbd_peer_request *r, void *d,
18409104d31aSLars Ellenberg 			      unsigned int payload_size)
18419104d31aSLars Ellenberg {
18429104d31aSLars Ellenberg 	unsigned int tmp = r->i.size;
18439104d31aSLars Ellenberg 	r->i.size = payload_size;
18449104d31aSLars Ellenberg 	drbd_csum_ee(h, r, d);
18459104d31aSLars Ellenberg 	r->i.size = tmp;
18469104d31aSLars Ellenberg }
18479104d31aSLars Ellenberg 
1848b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18499104d31aSLars Ellenberg  * and from receive_Data.
18509104d31aSLars Ellenberg  * data_size: actual payload ("data in")
18519104d31aSLars Ellenberg  * 	for normal writes that is bi_size.
18529104d31aSLars Ellenberg  * 	for discards, that is zero.
18539104d31aSLars Ellenberg  * 	for write same, it is logical_block_size.
18549104d31aSLars Ellenberg  * both trim and write same have the bi_size ("data len to be affected")
18559104d31aSLars Ellenberg  * as extra argument in the packet header.
18569104d31aSLars Ellenberg  */
1857f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
185869a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1859a0fb3c47SLars Ellenberg 	      struct packet_info *pi) __must_hold(local)
1860b411b363SPhilipp Reisner {
186169a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1862b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
1863db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
1864b411b363SPhilipp Reisner 	struct page *page;
186511f8b2b6SAndreas Gruenbacher 	int digest_size, err;
186611f8b2b6SAndreas Gruenbacher 	unsigned int data_size = pi->size, ds;
186769a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
186869a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
18696b4388acSPhilipp Reisner 	unsigned long *data;
1870a0fb3c47SLars Ellenberg 	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1871f31e583aSLars Ellenberg 	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
18729104d31aSLars Ellenberg 	struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1873b411b363SPhilipp Reisner 
187411f8b2b6SAndreas Gruenbacher 	digest_size = 0;
1875a0fb3c47SLars Ellenberg 	if (!trim && peer_device->connection->peer_integrity_tfm) {
18763d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18779f5bdc33SAndreas Gruenbacher 		/*
18789f5bdc33SAndreas Gruenbacher 		 * FIXME: Receive the incoming digest into the receive buffer
18799f5bdc33SAndreas Gruenbacher 		 *	  here, together with its struct p_data?
18809f5bdc33SAndreas Gruenbacher 		 */
188111f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1882a5c31904SAndreas Gruenbacher 		if (err)
1883b411b363SPhilipp Reisner 			return NULL;
188411f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
188588104ca4SAndreas Gruenbacher 	}
1886b411b363SPhilipp Reisner 
18879104d31aSLars Ellenberg 	/* assume request_size == data_size, but special case trim and wsame. */
18889104d31aSLars Ellenberg 	ds = data_size;
1889a0fb3c47SLars Ellenberg 	if (trim) {
18909104d31aSLars Ellenberg 		if (!expect(data_size == 0))
18919104d31aSLars Ellenberg 			return NULL;
18929104d31aSLars Ellenberg 		ds = be32_to_cpu(trim->size);
1893f31e583aSLars Ellenberg 	} else if (zeroes) {
1894f31e583aSLars Ellenberg 		if (!expect(data_size == 0))
1895f31e583aSLars Ellenberg 			return NULL;
1896f31e583aSLars Ellenberg 		ds = be32_to_cpu(zeroes->size);
18979104d31aSLars Ellenberg 	} else if (wsame) {
18989104d31aSLars Ellenberg 		if (data_size != queue_logical_block_size(device->rq_queue)) {
18999104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
19009104d31aSLars Ellenberg 				data_size, queue_logical_block_size(device->rq_queue));
19019104d31aSLars Ellenberg 			return NULL;
19029104d31aSLars Ellenberg 		}
19039104d31aSLars Ellenberg 		if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
19049104d31aSLars Ellenberg 			drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
19059104d31aSLars Ellenberg 				data_size, bdev_logical_block_size(device->ldev->backing_bdev));
19069104d31aSLars Ellenberg 			return NULL;
19079104d31aSLars Ellenberg 		}
19089104d31aSLars Ellenberg 		ds = be32_to_cpu(wsame->size);
1909a0fb3c47SLars Ellenberg 	}
1910a0fb3c47SLars Ellenberg 
19119104d31aSLars Ellenberg 	if (!expect(IS_ALIGNED(ds, 512)))
1912841ce241SAndreas Gruenbacher 		return NULL;
1913f31e583aSLars Ellenberg 	if (trim || wsame || zeroes) {
19149104d31aSLars Ellenberg 		if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
19159104d31aSLars Ellenberg 			return NULL;
19169104d31aSLars Ellenberg 	} else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1917841ce241SAndreas Gruenbacher 		return NULL;
1918b411b363SPhilipp Reisner 
19196666032aSLars Ellenberg 	/* even though we trust out peer,
19206666032aSLars Ellenberg 	 * we sometimes have to double check. */
19219104d31aSLars Ellenberg 	if (sector + (ds>>9) > capacity) {
1922d0180171SAndreas Gruenbacher 		drbd_err(device, "request from peer beyond end of local disk: "
1923fdda6544SLars Ellenberg 			"capacity: %llus < sector: %llus + size: %u\n",
19246666032aSLars Ellenberg 			(unsigned long long)capacity,
19259104d31aSLars Ellenberg 			(unsigned long long)sector, ds);
19266666032aSLars Ellenberg 		return NULL;
19276666032aSLars Ellenberg 	}
19286666032aSLars Ellenberg 
1929b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1930b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
1931b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
19329104d31aSLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1933db830c46SAndreas Gruenbacher 	if (!peer_req)
1934b411b363SPhilipp Reisner 		return NULL;
193545bb912bSLars Ellenberg 
193621ae5d7fSLars Ellenberg 	peer_req->flags |= EE_WRITE;
19379104d31aSLars Ellenberg 	if (trim) {
1938f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
1939f31e583aSLars Ellenberg 		return peer_req;
1940f31e583aSLars Ellenberg 	}
1941f31e583aSLars Ellenberg 	if (zeroes) {
1942f31e583aSLars Ellenberg 		peer_req->flags |= EE_ZEROOUT;
194381a3537aSLars Ellenberg 		return peer_req;
19449104d31aSLars Ellenberg 	}
19459104d31aSLars Ellenberg 	if (wsame)
19469104d31aSLars Ellenberg 		peer_req->flags |= EE_WRITE_SAME;
1947a73ff323SLars Ellenberg 
19489104d31aSLars Ellenberg 	/* receive payload size bytes into page chain */
1949b411b363SPhilipp Reisner 	ds = data_size;
1950db830c46SAndreas Gruenbacher 	page = peer_req->pages;
195145bb912bSLars Ellenberg 	page_chain_for_each(page) {
195245bb912bSLars Ellenberg 		unsigned len = min_t(int, ds, PAGE_SIZE);
19536b4388acSPhilipp Reisner 		data = kmap(page);
195469a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1955b30ab791SAndreas Gruenbacher 		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1956d0180171SAndreas Gruenbacher 			drbd_err(device, "Fault injection: Corrupting data on receive\n");
19576b4388acSPhilipp Reisner 			data[0] = data[0] ^ (unsigned long)-1;
19586b4388acSPhilipp Reisner 		}
1959b411b363SPhilipp Reisner 		kunmap(page);
1960a5c31904SAndreas Gruenbacher 		if (err) {
1961b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1962b411b363SPhilipp Reisner 			return NULL;
1963b411b363SPhilipp Reisner 		}
1964a5c31904SAndreas Gruenbacher 		ds -= len;
1965b411b363SPhilipp Reisner 	}
1966b411b363SPhilipp Reisner 
196711f8b2b6SAndreas Gruenbacher 	if (digest_size) {
19689104d31aSLars Ellenberg 		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
196911f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
1970d0180171SAndreas Gruenbacher 			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1971470be44aSLars Ellenberg 				(unsigned long long)sector, data_size);
1972b30ab791SAndreas Gruenbacher 			drbd_free_peer_req(device, peer_req);
1973b411b363SPhilipp Reisner 			return NULL;
1974b411b363SPhilipp Reisner 		}
1975b411b363SPhilipp Reisner 	}
1976b30ab791SAndreas Gruenbacher 	device->recv_cnt += data_size >> 9;
1977db830c46SAndreas Gruenbacher 	return peer_req;
1978b411b363SPhilipp Reisner }
1979b411b363SPhilipp Reisner 
1980b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1981b411b363SPhilipp Reisner  * out of the socket input buffer, and discards it.
1982b411b363SPhilipp Reisner  */
198369a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1984b411b363SPhilipp Reisner {
1985b411b363SPhilipp Reisner 	struct page *page;
1986a5c31904SAndreas Gruenbacher 	int err = 0;
1987b411b363SPhilipp Reisner 	void *data;
1988b411b363SPhilipp Reisner 
1989c3470cdeSLars Ellenberg 	if (!data_size)
1990fc5be839SAndreas Gruenbacher 		return 0;
1991c3470cdeSLars Ellenberg 
199269a22773SAndreas Gruenbacher 	page = drbd_alloc_pages(peer_device, 1, 1);
1993b411b363SPhilipp Reisner 
1994b411b363SPhilipp Reisner 	data = kmap(page);
1995b411b363SPhilipp Reisner 	while (data_size) {
1996fc5be839SAndreas Gruenbacher 		unsigned int len = min_t(int, data_size, PAGE_SIZE);
1997fc5be839SAndreas Gruenbacher 
199869a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, data, len);
1999a5c31904SAndreas Gruenbacher 		if (err)
2000b411b363SPhilipp Reisner 			break;
2001a5c31904SAndreas Gruenbacher 		data_size -= len;
2002b411b363SPhilipp Reisner 	}
2003b411b363SPhilipp Reisner 	kunmap(page);
200469a22773SAndreas Gruenbacher 	drbd_free_pages(peer_device->device, page, 0);
2005fc5be839SAndreas Gruenbacher 	return err;
2006b411b363SPhilipp Reisner }
2007b411b363SPhilipp Reisner 
200869a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
2009b411b363SPhilipp Reisner 			   sector_t sector, int data_size)
2010b411b363SPhilipp Reisner {
20117988613bSKent Overstreet 	struct bio_vec bvec;
20127988613bSKent Overstreet 	struct bvec_iter iter;
2013b411b363SPhilipp Reisner 	struct bio *bio;
201411f8b2b6SAndreas Gruenbacher 	int digest_size, err, expect;
201569a22773SAndreas Gruenbacher 	void *dig_in = peer_device->connection->int_dig_in;
201669a22773SAndreas Gruenbacher 	void *dig_vv = peer_device->connection->int_dig_vv;
2017b411b363SPhilipp Reisner 
201811f8b2b6SAndreas Gruenbacher 	digest_size = 0;
201969a22773SAndreas Gruenbacher 	if (peer_device->connection->peer_integrity_tfm) {
20203d0e6375SKees Cook 		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
202111f8b2b6SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
2022a5c31904SAndreas Gruenbacher 		if (err)
2023a5c31904SAndreas Gruenbacher 			return err;
202411f8b2b6SAndreas Gruenbacher 		data_size -= digest_size;
202588104ca4SAndreas Gruenbacher 	}
2026b411b363SPhilipp Reisner 
2027b411b363SPhilipp Reisner 	/* optimistically update recv_cnt.  if receiving fails below,
2028b411b363SPhilipp Reisner 	 * we disconnect anyways, and counters will be reset. */
202969a22773SAndreas Gruenbacher 	peer_device->device->recv_cnt += data_size>>9;
2030b411b363SPhilipp Reisner 
2031b411b363SPhilipp Reisner 	bio = req->master_bio;
203269a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2033b411b363SPhilipp Reisner 
20347988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
20357988613bSKent Overstreet 		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
20367988613bSKent Overstreet 		expect = min_t(int, data_size, bvec.bv_len);
203769a22773SAndreas Gruenbacher 		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20387988613bSKent Overstreet 		kunmap(bvec.bv_page);
2039a5c31904SAndreas Gruenbacher 		if (err)
2040a5c31904SAndreas Gruenbacher 			return err;
2041a5c31904SAndreas Gruenbacher 		data_size -= expect;
2042b411b363SPhilipp Reisner 	}
2043b411b363SPhilipp Reisner 
204411f8b2b6SAndreas Gruenbacher 	if (digest_size) {
204569a22773SAndreas Gruenbacher 		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
204611f8b2b6SAndreas Gruenbacher 		if (memcmp(dig_in, dig_vv, digest_size)) {
204769a22773SAndreas Gruenbacher 			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
204828284cefSAndreas Gruenbacher 			return -EINVAL;
2049b411b363SPhilipp Reisner 		}
2050b411b363SPhilipp Reisner 	}
2051b411b363SPhilipp Reisner 
205269a22773SAndreas Gruenbacher 	D_ASSERT(peer_device->device, data_size == 0);
205328284cefSAndreas Gruenbacher 	return 0;
2054b411b363SPhilipp Reisner }
2055b411b363SPhilipp Reisner 
2056a990be46SAndreas Gruenbacher /*
2057668700b4SPhilipp Reisner  * e_end_resync_block() is called in ack_sender context via
2058a990be46SAndreas Gruenbacher  * drbd_finish_peer_reqs().
2059a990be46SAndreas Gruenbacher  */
206099920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2061b411b363SPhilipp Reisner {
20628050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2063a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2064a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2065a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2066db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
206799920dc5SAndreas Gruenbacher 	int err;
2068b411b363SPhilipp Reisner 
20690b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2070b411b363SPhilipp Reisner 
2071db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2072b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, peer_req->i.size);
2073a8cd15baSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2074b411b363SPhilipp Reisner 	} else {
2075b411b363SPhilipp Reisner 		/* Record failure to sync */
2076b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, peer_req->i.size);
2077b411b363SPhilipp Reisner 
2078a8cd15baSAndreas Gruenbacher 		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2079b411b363SPhilipp Reisner 	}
2080b30ab791SAndreas Gruenbacher 	dec_unacked(device);
2081b411b363SPhilipp Reisner 
208299920dc5SAndreas Gruenbacher 	return err;
2083b411b363SPhilipp Reisner }
2084b411b363SPhilipp Reisner 
208569a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2086a0fb3c47SLars Ellenberg 			    struct packet_info *pi) __releases(local)
2087b411b363SPhilipp Reisner {
208869a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2089db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2090b411b363SPhilipp Reisner 
2091a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2092db830c46SAndreas Gruenbacher 	if (!peer_req)
209345bb912bSLars Ellenberg 		goto fail;
2094b411b363SPhilipp Reisner 
2095b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
2096b411b363SPhilipp Reisner 
2097b30ab791SAndreas Gruenbacher 	inc_unacked(device);
2098b411b363SPhilipp Reisner 	/* corresponding dec_unacked() in e_end_resync_block()
2099b411b363SPhilipp Reisner 	 * respective _drbd_clear_done_ee */
2100b411b363SPhilipp Reisner 
2101a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_resync_block;
210221ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
210345bb912bSLars Ellenberg 
21040500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2105b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->sync_ee);
21060500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2107b411b363SPhilipp Reisner 
2108a0fb3c47SLars Ellenberg 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
2109bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2110bb3cc85eSMike Christie 				     DRBD_FAULT_RS_WR) == 0)
2111e1c1b0fcSAndreas Gruenbacher 		return 0;
211245bb912bSLars Ellenberg 
211310f6d992SLars Ellenberg 	/* don't care for the reason here */
2114d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
21150500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2116a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
21170500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
211822cc37a9SLars Ellenberg 
2119b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
212045bb912bSLars Ellenberg fail:
2121b30ab791SAndreas Gruenbacher 	put_ldev(device);
2122e1c1b0fcSAndreas Gruenbacher 	return -EIO;
2123b411b363SPhilipp Reisner }
2124b411b363SPhilipp Reisner 
2125668eebc6SAndreas Gruenbacher static struct drbd_request *
2126b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2127bc9c5c41SAndreas Gruenbacher 	     sector_t sector, bool missing_ok, const char *func)
2128b411b363SPhilipp Reisner {
2129b411b363SPhilipp Reisner 	struct drbd_request *req;
2130668eebc6SAndreas Gruenbacher 
2131bc9c5c41SAndreas Gruenbacher 	/* Request object according to our peer */
2132bc9c5c41SAndreas Gruenbacher 	req = (struct drbd_request *)(unsigned long)id;
21335e472264SAndreas Gruenbacher 	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2134668eebc6SAndreas Gruenbacher 		return req;
2135c3afd8f5SAndreas Gruenbacher 	if (!missing_ok) {
2136d0180171SAndreas Gruenbacher 		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2137c3afd8f5SAndreas Gruenbacher 			(unsigned long)id, (unsigned long long)sector);
2138c3afd8f5SAndreas Gruenbacher 	}
2139668eebc6SAndreas Gruenbacher 	return NULL;
2140668eebc6SAndreas Gruenbacher }
2141668eebc6SAndreas Gruenbacher 
2142bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2143b411b363SPhilipp Reisner {
21449f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2145b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2146b411b363SPhilipp Reisner 	struct drbd_request *req;
2147b411b363SPhilipp Reisner 	sector_t sector;
214882bc0194SAndreas Gruenbacher 	int err;
2149e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21504a76b161SAndreas Gruenbacher 
21519f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21529f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21534a76b161SAndreas Gruenbacher 		return -EIO;
21549f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2155b411b363SPhilipp Reisner 
2156b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2157b411b363SPhilipp Reisner 
21580500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2159b30ab791SAndreas Gruenbacher 	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21600500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2161c3afd8f5SAndreas Gruenbacher 	if (unlikely(!req))
216282bc0194SAndreas Gruenbacher 		return -EIO;
2163b411b363SPhilipp Reisner 
216424c4830cSBart Van Assche 	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
2165b411b363SPhilipp Reisner 	 * special casing it there for the various failure cases.
2166b411b363SPhilipp Reisner 	 * still no race with drbd_fail_pending_reads */
216769a22773SAndreas Gruenbacher 	err = recv_dless_read(peer_device, req, sector, pi->size);
216882bc0194SAndreas Gruenbacher 	if (!err)
21698554df1cSAndreas Gruenbacher 		req_mod(req, DATA_RECEIVED);
2170b411b363SPhilipp Reisner 	/* else: nothing. handled from drbd_disconnect...
2171b411b363SPhilipp Reisner 	 * I don't think we may complete this just yet
2172b411b363SPhilipp Reisner 	 * in case we are "on-disconnect: freeze" */
2173b411b363SPhilipp Reisner 
217482bc0194SAndreas Gruenbacher 	return err;
2175b411b363SPhilipp Reisner }
2176b411b363SPhilipp Reisner 
2177bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2178b411b363SPhilipp Reisner {
21799f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2180b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2181b411b363SPhilipp Reisner 	sector_t sector;
218282bc0194SAndreas Gruenbacher 	int err;
2183e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
21844a76b161SAndreas Gruenbacher 
21859f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
21869f4fe9adSAndreas Gruenbacher 	if (!peer_device)
21874a76b161SAndreas Gruenbacher 		return -EIO;
21889f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2189b411b363SPhilipp Reisner 
2190b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
21910b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, p->block_id == ID_SYNCER);
2192b411b363SPhilipp Reisner 
2193b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
2194b411b363SPhilipp Reisner 		/* data is submitted to disk within recv_resync_read.
2195b411b363SPhilipp Reisner 		 * corresponding put_ldev done below on error,
2196fcefa62eSAndreas Gruenbacher 		 * or in drbd_peer_request_endio. */
2197a0fb3c47SLars Ellenberg 		err = recv_resync_read(peer_device, sector, pi);
2198b411b363SPhilipp Reisner 	} else {
2199b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2200d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not write resync data to local disk.\n");
2201b411b363SPhilipp Reisner 
220269a22773SAndreas Gruenbacher 		err = drbd_drain_block(peer_device, pi->size);
2203b411b363SPhilipp Reisner 
220469a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2205b411b363SPhilipp Reisner 	}
2206b411b363SPhilipp Reisner 
2207b30ab791SAndreas Gruenbacher 	atomic_add(pi->size >> 9, &device->rs_sect_in);
2208778f271dSPhilipp Reisner 
220982bc0194SAndreas Gruenbacher 	return err;
2210b411b363SPhilipp Reisner }
2211b411b363SPhilipp Reisner 
2212b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
22137be8da07SAndreas Gruenbacher 				       sector_t sector, int size)
2214b411b363SPhilipp Reisner {
22157be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
22167be8da07SAndreas Gruenbacher 	struct drbd_request *req;
2217b411b363SPhilipp Reisner 
2218b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
22197be8da07SAndreas Gruenbacher 		if (!i->local)
22207be8da07SAndreas Gruenbacher 			continue;
22217be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
22227be8da07SAndreas Gruenbacher 		if (req->rq_state & RQ_LOCAL_PENDING ||
22237be8da07SAndreas Gruenbacher 		    !(req->rq_state & RQ_POSTPONED))
22247be8da07SAndreas Gruenbacher 			continue;
22252312f0b3SLars Ellenberg 		/* as it is RQ_POSTPONED, this will cause it to
22262312f0b3SLars Ellenberg 		 * be queued on the retry workqueue. */
2227d4dabbe2SLars Ellenberg 		__req_mod(req, CONFLICT_RESOLVED, NULL);
22287be8da07SAndreas Gruenbacher 	}
22297be8da07SAndreas Gruenbacher }
22307be8da07SAndreas Gruenbacher 
2231a990be46SAndreas Gruenbacher /*
2232668700b4SPhilipp Reisner  * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2233b411b363SPhilipp Reisner  */
223499920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2235b411b363SPhilipp Reisner {
22368050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2237a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2238a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
2239a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2240db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
224199920dc5SAndreas Gruenbacher 	int err = 0, pcmd;
2242b411b363SPhilipp Reisner 
2243303d1448SPhilipp Reisner 	if (peer_req->flags & EE_SEND_WRITE_ACK) {
2244db830c46SAndreas Gruenbacher 		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2245b30ab791SAndreas Gruenbacher 			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2246b30ab791SAndreas Gruenbacher 				device->state.conn <= C_PAUSED_SYNC_T &&
2247db830c46SAndreas Gruenbacher 				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2248b411b363SPhilipp Reisner 				P_RS_WRITE_ACK : P_WRITE_ACK;
2249a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, pcmd, peer_req);
2250b411b363SPhilipp Reisner 			if (pcmd == P_RS_WRITE_ACK)
2251b30ab791SAndreas Gruenbacher 				drbd_set_in_sync(device, sector, peer_req->i.size);
2252b411b363SPhilipp Reisner 		} else {
2253a8cd15baSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2254b411b363SPhilipp Reisner 			/* we expect it to be marked out of sync anyways...
2255b411b363SPhilipp Reisner 			 * maybe assert this?  */
2256b411b363SPhilipp Reisner 		}
2257b30ab791SAndreas Gruenbacher 		dec_unacked(device);
2258b411b363SPhilipp Reisner 	}
225908d0dabfSLars Ellenberg 
2260b411b363SPhilipp Reisner 	/* we delete from the conflict detection hash _after_ we sent out the
2261b411b363SPhilipp Reisner 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
2262302bdeaeSPhilipp Reisner 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22630500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
22640b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2265b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
22667be8da07SAndreas Gruenbacher 		if (peer_req->flags & EE_RESTART_REQUESTS)
2267b30ab791SAndreas Gruenbacher 			restart_conflicting_writes(device, sector, peer_req->i.size);
22680500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
2269bb3bfe96SAndreas Gruenbacher 	} else
22700b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2271b411b363SPhilipp Reisner 
22725dd2ca19SAndreas Gruenbacher 	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2273b411b363SPhilipp Reisner 
227499920dc5SAndreas Gruenbacher 	return err;
2275b411b363SPhilipp Reisner }
2276b411b363SPhilipp Reisner 
2277a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2278b411b363SPhilipp Reisner {
22798050e6d0SAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2280a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2281a8cd15baSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
228299920dc5SAndreas Gruenbacher 	int err;
2283b411b363SPhilipp Reisner 
2284a8cd15baSAndreas Gruenbacher 	err = drbd_send_ack(peer_device, ack, peer_req);
2285a8cd15baSAndreas Gruenbacher 	dec_unacked(peer_device->device);
2286b411b363SPhilipp Reisner 
228799920dc5SAndreas Gruenbacher 	return err;
2288b411b363SPhilipp Reisner }
2289b411b363SPhilipp Reisner 
2290d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2291b6a370baSPhilipp Reisner {
2292a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, P_SUPERSEDED);
22937be8da07SAndreas Gruenbacher }
2294b6a370baSPhilipp Reisner 
229599920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22967be8da07SAndreas Gruenbacher {
2297a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req =
2298a8cd15baSAndreas Gruenbacher 		container_of(w, struct drbd_peer_request, w);
2299a8cd15baSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
23007be8da07SAndreas Gruenbacher 
2301a8cd15baSAndreas Gruenbacher 	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2302d4dabbe2SLars Ellenberg 			     P_RETRY_WRITE : P_SUPERSEDED);
23037be8da07SAndreas Gruenbacher }
23047be8da07SAndreas Gruenbacher 
23053e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
23063e394da1SAndreas Gruenbacher {
23073e394da1SAndreas Gruenbacher 	/*
23083e394da1SAndreas Gruenbacher 	 * We assume 32-bit wrap-around here.
23093e394da1SAndreas Gruenbacher 	 * For 24-bit wrap-around, we would have to shift:
23103e394da1SAndreas Gruenbacher 	 *  a <<= 8; b <<= 8;
23113e394da1SAndreas Gruenbacher 	 */
23123e394da1SAndreas Gruenbacher 	return (s32)a - (s32)b > 0;
23133e394da1SAndreas Gruenbacher }
23143e394da1SAndreas Gruenbacher 
23153e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
23163e394da1SAndreas Gruenbacher {
23173e394da1SAndreas Gruenbacher 	return seq_greater(a, b) ? a : b;
23183e394da1SAndreas Gruenbacher }
23193e394da1SAndreas Gruenbacher 
232069a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
23213e394da1SAndreas Gruenbacher {
232269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
23233c13b680SLars Ellenberg 	unsigned int newest_peer_seq;
23243e394da1SAndreas Gruenbacher 
232569a22773SAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2326b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
2327b30ab791SAndreas Gruenbacher 		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2328b30ab791SAndreas Gruenbacher 		device->peer_seq = newest_peer_seq;
2329b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
2330b30ab791SAndreas Gruenbacher 		/* wake up only if we actually changed device->peer_seq */
23313c13b680SLars Ellenberg 		if (peer_seq == newest_peer_seq)
2332b30ab791SAndreas Gruenbacher 			wake_up(&device->seq_wait);
23333e394da1SAndreas Gruenbacher 	}
23347be8da07SAndreas Gruenbacher }
23353e394da1SAndreas Gruenbacher 
2336d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2337d93f6302SLars Ellenberg {
2338d93f6302SLars Ellenberg 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2339d93f6302SLars Ellenberg }
2340d93f6302SLars Ellenberg 
2341d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
2342b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2343d93f6302SLars Ellenberg {
2344d93f6302SLars Ellenberg 	struct drbd_peer_request *rs_req;
23457e5fec31SFabian Frederick 	bool rv = false;
2346b6a370baSPhilipp Reisner 
23470500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2348a8cd15baSAndreas Gruenbacher 	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2349d93f6302SLars Ellenberg 		if (overlaps(peer_req->i.sector, peer_req->i.size,
2350d93f6302SLars Ellenberg 			     rs_req->i.sector, rs_req->i.size)) {
23517e5fec31SFabian Frederick 			rv = true;
2352b6a370baSPhilipp Reisner 			break;
2353b6a370baSPhilipp Reisner 		}
2354b6a370baSPhilipp Reisner 	}
23550500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2356b6a370baSPhilipp Reisner 
2357b6a370baSPhilipp Reisner 	return rv;
2358b6a370baSPhilipp Reisner }
2359b6a370baSPhilipp Reisner 
2360b411b363SPhilipp Reisner /* Called from receive_Data.
2361b411b363SPhilipp Reisner  * Synchronize packets on sock with packets on msock.
2362b411b363SPhilipp Reisner  *
2363b411b363SPhilipp Reisner  * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2364b411b363SPhilipp Reisner  * packet traveling on msock, they are still processed in the order they have
2365b411b363SPhilipp Reisner  * been sent.
2366b411b363SPhilipp Reisner  *
2367b411b363SPhilipp Reisner  * Note: we don't care for Ack packets overtaking P_DATA packets.
2368b411b363SPhilipp Reisner  *
2369b30ab791SAndreas Gruenbacher  * In case packet_seq is larger than device->peer_seq number, there are
2370b411b363SPhilipp Reisner  * outstanding packets on the msock. We wait for them to arrive.
2371b30ab791SAndreas Gruenbacher  * In case we are the logically next packet, we update device->peer_seq
2372b411b363SPhilipp Reisner  * ourselves. Correctly handles 32bit wrap around.
2373b411b363SPhilipp Reisner  *
2374b411b363SPhilipp Reisner  * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2375b411b363SPhilipp Reisner  * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2376b411b363SPhilipp Reisner  * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2377b411b363SPhilipp Reisner  * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2378b411b363SPhilipp Reisner  *
2379b411b363SPhilipp Reisner  * returns 0 if we may process the packet,
2380b411b363SPhilipp Reisner  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
238169a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2382b411b363SPhilipp Reisner {
238369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
2384b411b363SPhilipp Reisner 	DEFINE_WAIT(wait);
2385b411b363SPhilipp Reisner 	long timeout;
2386b874d231SPhilipp Reisner 	int ret = 0, tp;
23877be8da07SAndreas Gruenbacher 
238869a22773SAndreas Gruenbacher 	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23897be8da07SAndreas Gruenbacher 		return 0;
23907be8da07SAndreas Gruenbacher 
2391b30ab791SAndreas Gruenbacher 	spin_lock(&device->peer_seq_lock);
2392b411b363SPhilipp Reisner 	for (;;) {
2393b30ab791SAndreas Gruenbacher 		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2394b30ab791SAndreas Gruenbacher 			device->peer_seq = seq_max(device->peer_seq, peer_seq);
2395b411b363SPhilipp Reisner 			break;
23967be8da07SAndreas Gruenbacher 		}
2397b874d231SPhilipp Reisner 
2398b411b363SPhilipp Reisner 		if (signal_pending(current)) {
2399b411b363SPhilipp Reisner 			ret = -ERESTARTSYS;
2400b411b363SPhilipp Reisner 			break;
2401b411b363SPhilipp Reisner 		}
2402b874d231SPhilipp Reisner 
2403b874d231SPhilipp Reisner 		rcu_read_lock();
24045dd2ca19SAndreas Gruenbacher 		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2405b874d231SPhilipp Reisner 		rcu_read_unlock();
2406b874d231SPhilipp Reisner 
2407b874d231SPhilipp Reisner 		if (!tp)
2408b874d231SPhilipp Reisner 			break;
2409b874d231SPhilipp Reisner 
2410b874d231SPhilipp Reisner 		/* Only need to wait if two_primaries is enabled */
2411b30ab791SAndreas Gruenbacher 		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2412b30ab791SAndreas Gruenbacher 		spin_unlock(&device->peer_seq_lock);
241344ed167dSPhilipp Reisner 		rcu_read_lock();
241469a22773SAndreas Gruenbacher 		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
241544ed167dSPhilipp Reisner 		rcu_read_unlock();
241671b1c1ebSAndreas Gruenbacher 		timeout = schedule_timeout(timeout);
2417b30ab791SAndreas Gruenbacher 		spin_lock(&device->peer_seq_lock);
24187be8da07SAndreas Gruenbacher 		if (!timeout) {
2419b411b363SPhilipp Reisner 			ret = -ETIMEDOUT;
2420d0180171SAndreas Gruenbacher 			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2421b411b363SPhilipp Reisner 			break;
2422b411b363SPhilipp Reisner 		}
2423b411b363SPhilipp Reisner 	}
2424b30ab791SAndreas Gruenbacher 	spin_unlock(&device->peer_seq_lock);
2425b30ab791SAndreas Gruenbacher 	finish_wait(&device->seq_wait, &wait);
2426b411b363SPhilipp Reisner 	return ret;
2427b411b363SPhilipp Reisner }
2428b411b363SPhilipp Reisner 
2429688593c5SLars Ellenberg /* see also bio_flags_to_wire()
2430688593c5SLars Ellenberg  * DRBD_REQ_*, because we need to semantically map the flags to data packet
2431688593c5SLars Ellenberg  * flags and back. We may replicate to other kernel versions. */
2432bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf)
243376d2e7ecSPhilipp Reisner {
243476d2e7ecSPhilipp Reisner 	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
243576d2e7ecSPhilipp Reisner 		(dpf & DP_FUA ? REQ_FUA : 0) |
243628a8f0d3SMike Christie 		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2437bb3cc85eSMike Christie }
2438bb3cc85eSMike Christie 
2439bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf)
2440bb3cc85eSMike Christie {
2441f31e583aSLars Ellenberg 	if (dpf & DP_ZEROES)
244245c21793SChristoph Hellwig 		return REQ_OP_WRITE_ZEROES;
2443f31e583aSLars Ellenberg 	if (dpf & DP_DISCARD)
2444f31e583aSLars Ellenberg 		return REQ_OP_DISCARD;
2445f31e583aSLars Ellenberg 	if (dpf & DP_WSAME)
2446f31e583aSLars Ellenberg 		return REQ_OP_WRITE_SAME;
2447bb3cc85eSMike Christie 	else
2448bb3cc85eSMike Christie 		return REQ_OP_WRITE;
244976d2e7ecSPhilipp Reisner }
245076d2e7ecSPhilipp Reisner 
2451b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24527be8da07SAndreas Gruenbacher 				    unsigned int size)
2453b411b363SPhilipp Reisner {
24547be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24557be8da07SAndreas Gruenbacher 
24567be8da07SAndreas Gruenbacher     repeat:
2457b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24587be8da07SAndreas Gruenbacher 		struct drbd_request *req;
24597be8da07SAndreas Gruenbacher 		struct bio_and_error m;
24607be8da07SAndreas Gruenbacher 
24617be8da07SAndreas Gruenbacher 		if (!i->local)
24627be8da07SAndreas Gruenbacher 			continue;
24637be8da07SAndreas Gruenbacher 		req = container_of(i, struct drbd_request, i);
24647be8da07SAndreas Gruenbacher 		if (!(req->rq_state & RQ_POSTPONED))
24657be8da07SAndreas Gruenbacher 			continue;
24667be8da07SAndreas Gruenbacher 		req->rq_state &= ~RQ_POSTPONED;
24677be8da07SAndreas Gruenbacher 		__req_mod(req, NEG_ACKED, &m);
24680500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
24697be8da07SAndreas Gruenbacher 		if (m.bio)
2470b30ab791SAndreas Gruenbacher 			complete_master_bio(device, &m);
24710500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
24727be8da07SAndreas Gruenbacher 		goto repeat;
24737be8da07SAndreas Gruenbacher 	}
24747be8da07SAndreas Gruenbacher }
24757be8da07SAndreas Gruenbacher 
2476b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24777be8da07SAndreas Gruenbacher 				  struct drbd_peer_request *peer_req)
24787be8da07SAndreas Gruenbacher {
2479e33b32deSAndreas Gruenbacher 	struct drbd_connection *connection = peer_req->peer_device->connection;
2480bde89a9eSAndreas Gruenbacher 	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24817be8da07SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
24827be8da07SAndreas Gruenbacher 	const unsigned int size = peer_req->i.size;
24837be8da07SAndreas Gruenbacher 	struct drbd_interval *i;
24847be8da07SAndreas Gruenbacher 	bool equal;
24857be8da07SAndreas Gruenbacher 	int err;
24867be8da07SAndreas Gruenbacher 
24877be8da07SAndreas Gruenbacher 	/*
24887be8da07SAndreas Gruenbacher 	 * Inserting the peer request into the write_requests tree will prevent
24897be8da07SAndreas Gruenbacher 	 * new conflicting local requests from being added.
24907be8da07SAndreas Gruenbacher 	 */
2491b30ab791SAndreas Gruenbacher 	drbd_insert_interval(&device->write_requests, &peer_req->i);
24927be8da07SAndreas Gruenbacher 
24937be8da07SAndreas Gruenbacher     repeat:
2494b30ab791SAndreas Gruenbacher 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24957be8da07SAndreas Gruenbacher 		if (i == &peer_req->i)
24967be8da07SAndreas Gruenbacher 			continue;
249708d0dabfSLars Ellenberg 		if (i->completed)
249808d0dabfSLars Ellenberg 			continue;
24997be8da07SAndreas Gruenbacher 
25007be8da07SAndreas Gruenbacher 		if (!i->local) {
25017be8da07SAndreas Gruenbacher 			/*
25027be8da07SAndreas Gruenbacher 			 * Our peer has sent a conflicting remote request; this
25037be8da07SAndreas Gruenbacher 			 * should not happen in a two-node setup.  Wait for the
25047be8da07SAndreas Gruenbacher 			 * earlier peer request to complete.
25057be8da07SAndreas Gruenbacher 			 */
2506b30ab791SAndreas Gruenbacher 			err = drbd_wait_misc(device, i);
25077be8da07SAndreas Gruenbacher 			if (err)
25087be8da07SAndreas Gruenbacher 				goto out;
25097be8da07SAndreas Gruenbacher 			goto repeat;
25107be8da07SAndreas Gruenbacher 		}
25117be8da07SAndreas Gruenbacher 
25127be8da07SAndreas Gruenbacher 		equal = i->sector == sector && i->size == size;
25137be8da07SAndreas Gruenbacher 		if (resolve_conflicts) {
25147be8da07SAndreas Gruenbacher 			/*
25157be8da07SAndreas Gruenbacher 			 * If the peer request is fully contained within the
2516d4dabbe2SLars Ellenberg 			 * overlapping request, it can be considered overwritten
2517d4dabbe2SLars Ellenberg 			 * and thus superseded; otherwise, it will be retried
2518d4dabbe2SLars Ellenberg 			 * once all overlapping requests have completed.
25197be8da07SAndreas Gruenbacher 			 */
2520d4dabbe2SLars Ellenberg 			bool superseded = i->sector <= sector && i->sector +
25217be8da07SAndreas Gruenbacher 				       (i->size >> 9) >= sector + (size >> 9);
25227be8da07SAndreas Gruenbacher 
25237be8da07SAndreas Gruenbacher 			if (!equal)
2524d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25257be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u, "
25267be8da07SAndreas Gruenbacher 					       "assuming %s came first\n",
25277be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25287be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size,
2529d4dabbe2SLars Ellenberg 					  superseded ? "local" : "remote");
25307be8da07SAndreas Gruenbacher 
2531a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = superseded ? e_send_superseded :
25327be8da07SAndreas Gruenbacher 						   e_send_retry_write;
2533a8cd15baSAndreas Gruenbacher 			list_add_tail(&peer_req->w.list, &device->done_ee);
2534668700b4SPhilipp Reisner 			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25357be8da07SAndreas Gruenbacher 
25367be8da07SAndreas Gruenbacher 			err = -ENOENT;
25377be8da07SAndreas Gruenbacher 			goto out;
25387be8da07SAndreas Gruenbacher 		} else {
25397be8da07SAndreas Gruenbacher 			struct drbd_request *req =
25407be8da07SAndreas Gruenbacher 				container_of(i, struct drbd_request, i);
25417be8da07SAndreas Gruenbacher 
25427be8da07SAndreas Gruenbacher 			if (!equal)
2543d0180171SAndreas Gruenbacher 				drbd_alert(device, "Concurrent writes detected: "
25447be8da07SAndreas Gruenbacher 					       "local=%llus +%u, remote=%llus +%u\n",
25457be8da07SAndreas Gruenbacher 					  (unsigned long long)i->sector, i->size,
25467be8da07SAndreas Gruenbacher 					  (unsigned long long)sector, size);
25477be8da07SAndreas Gruenbacher 
25487be8da07SAndreas Gruenbacher 			if (req->rq_state & RQ_LOCAL_PENDING ||
25497be8da07SAndreas Gruenbacher 			    !(req->rq_state & RQ_POSTPONED)) {
25507be8da07SAndreas Gruenbacher 				/*
25517be8da07SAndreas Gruenbacher 				 * Wait for the node with the discard flag to
2552d4dabbe2SLars Ellenberg 				 * decide if this request has been superseded
2553d4dabbe2SLars Ellenberg 				 * or needs to be retried.
2554d4dabbe2SLars Ellenberg 				 * Requests that have been superseded will
25557be8da07SAndreas Gruenbacher 				 * disappear from the write_requests tree.
25567be8da07SAndreas Gruenbacher 				 *
25577be8da07SAndreas Gruenbacher 				 * In addition, wait for the conflicting
25587be8da07SAndreas Gruenbacher 				 * request to finish locally before submitting
25597be8da07SAndreas Gruenbacher 				 * the conflicting peer request.
25607be8da07SAndreas Gruenbacher 				 */
2561b30ab791SAndreas Gruenbacher 				err = drbd_wait_misc(device, &req->i);
25627be8da07SAndreas Gruenbacher 				if (err) {
2563e33b32deSAndreas Gruenbacher 					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2564b30ab791SAndreas Gruenbacher 					fail_postponed_requests(device, sector, size);
25657be8da07SAndreas Gruenbacher 					goto out;
25667be8da07SAndreas Gruenbacher 				}
25677be8da07SAndreas Gruenbacher 				goto repeat;
25687be8da07SAndreas Gruenbacher 			}
25697be8da07SAndreas Gruenbacher 			/*
25707be8da07SAndreas Gruenbacher 			 * Remember to restart the conflicting requests after
25717be8da07SAndreas Gruenbacher 			 * the new peer request has completed.
25727be8da07SAndreas Gruenbacher 			 */
25737be8da07SAndreas Gruenbacher 			peer_req->flags |= EE_RESTART_REQUESTS;
25747be8da07SAndreas Gruenbacher 		}
25757be8da07SAndreas Gruenbacher 	}
25767be8da07SAndreas Gruenbacher 	err = 0;
25777be8da07SAndreas Gruenbacher 
25787be8da07SAndreas Gruenbacher     out:
25797be8da07SAndreas Gruenbacher 	if (err)
2580b30ab791SAndreas Gruenbacher 		drbd_remove_epoch_entry_interval(device, peer_req);
25817be8da07SAndreas Gruenbacher 	return err;
25827be8da07SAndreas Gruenbacher }
25837be8da07SAndreas Gruenbacher 
2584b411b363SPhilipp Reisner /* mirrored write */
2585bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2586b411b363SPhilipp Reisner {
25879f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2588b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
258921ae5d7fSLars Ellenberg 	struct net_conf *nc;
2590b411b363SPhilipp Reisner 	sector_t sector;
2591db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2592e658983aSAndreas Gruenbacher 	struct p_data *p = pi->data;
25937be8da07SAndreas Gruenbacher 	u32 peer_seq = be32_to_cpu(p->seq_num);
2594bb3cc85eSMike Christie 	int op, op_flags;
2595b411b363SPhilipp Reisner 	u32 dp_flags;
2596302bdeaeSPhilipp Reisner 	int err, tp;
25977be8da07SAndreas Gruenbacher 
25989f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
25999f4fe9adSAndreas Gruenbacher 	if (!peer_device)
26004a76b161SAndreas Gruenbacher 		return -EIO;
26019f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2602b411b363SPhilipp Reisner 
2603b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
260482bc0194SAndreas Gruenbacher 		int err2;
2605b411b363SPhilipp Reisner 
260669a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
260769a22773SAndreas Gruenbacher 		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2608bde89a9eSAndreas Gruenbacher 		atomic_inc(&connection->current_epoch->epoch_size);
260969a22773SAndreas Gruenbacher 		err2 = drbd_drain_block(peer_device, pi->size);
261082bc0194SAndreas Gruenbacher 		if (!err)
261182bc0194SAndreas Gruenbacher 			err = err2;
261282bc0194SAndreas Gruenbacher 		return err;
2613b411b363SPhilipp Reisner 	}
2614b411b363SPhilipp Reisner 
2615fcefa62eSAndreas Gruenbacher 	/*
2616fcefa62eSAndreas Gruenbacher 	 * Corresponding put_ldev done either below (on various errors), or in
2617fcefa62eSAndreas Gruenbacher 	 * drbd_peer_request_endio, if we successfully submit the data at the
2618fcefa62eSAndreas Gruenbacher 	 * end of this function.
2619fcefa62eSAndreas Gruenbacher 	 */
2620b411b363SPhilipp Reisner 
2621b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2622a0fb3c47SLars Ellenberg 	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2623db830c46SAndreas Gruenbacher 	if (!peer_req) {
2624b30ab791SAndreas Gruenbacher 		put_ldev(device);
262582bc0194SAndreas Gruenbacher 		return -EIO;
2626b411b363SPhilipp Reisner 	}
2627b411b363SPhilipp Reisner 
2628a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = e_end_block;
262921ae5d7fSLars Ellenberg 	peer_req->submit_jif = jiffies;
263021ae5d7fSLars Ellenberg 	peer_req->flags |= EE_APPLICATION;
2631b411b363SPhilipp Reisner 
2632688593c5SLars Ellenberg 	dp_flags = be32_to_cpu(p->dp_flags);
2633bb3cc85eSMike Christie 	op = wire_flags_to_bio_op(dp_flags);
2634bb3cc85eSMike Christie 	op_flags = wire_flags_to_bio_flags(dp_flags);
2635a0fb3c47SLars Ellenberg 	if (pi->cmd == P_TRIM) {
2636a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
2637f31e583aSLars Ellenberg 		D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2638f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2639f31e583aSLars Ellenberg 		/* need to play safe: an older DRBD sender
2640f31e583aSLars Ellenberg 		 * may mean zero-out while sending P_TRIM. */
2641f31e583aSLars Ellenberg 		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2642f31e583aSLars Ellenberg 			peer_req->flags |= EE_ZEROOUT;
2643f31e583aSLars Ellenberg 	} else if (pi->cmd == P_ZEROES) {
2644f31e583aSLars Ellenberg 		D_ASSERT(peer_device, peer_req->i.size > 0);
264545c21793SChristoph Hellwig 		D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
2646a0fb3c47SLars Ellenberg 		D_ASSERT(peer_device, peer_req->pages == NULL);
2647f31e583aSLars Ellenberg 		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2648f31e583aSLars Ellenberg 		if (dp_flags & DP_DISCARD)
2649f31e583aSLars Ellenberg 			peer_req->flags |= EE_TRIM;
2650a0fb3c47SLars Ellenberg 	} else if (peer_req->pages == NULL) {
26510b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, peer_req->i.size == 0);
26520b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, dp_flags & DP_FLUSH);
2653a73ff323SLars Ellenberg 	}
2654688593c5SLars Ellenberg 
2655688593c5SLars Ellenberg 	if (dp_flags & DP_MAY_SET_IN_SYNC)
2656db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_MAY_SET_IN_SYNC;
2657688593c5SLars Ellenberg 
2658bde89a9eSAndreas Gruenbacher 	spin_lock(&connection->epoch_lock);
2659bde89a9eSAndreas Gruenbacher 	peer_req->epoch = connection->current_epoch;
2660db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->epoch_size);
2661db830c46SAndreas Gruenbacher 	atomic_inc(&peer_req->epoch->active);
2662bde89a9eSAndreas Gruenbacher 	spin_unlock(&connection->epoch_lock);
2663b411b363SPhilipp Reisner 
2664302bdeaeSPhilipp Reisner 	rcu_read_lock();
266521ae5d7fSLars Ellenberg 	nc = rcu_dereference(peer_device->connection->net_conf);
266621ae5d7fSLars Ellenberg 	tp = nc->two_primaries;
266721ae5d7fSLars Ellenberg 	if (peer_device->connection->agreed_pro_version < 100) {
266821ae5d7fSLars Ellenberg 		switch (nc->wire_protocol) {
266921ae5d7fSLars Ellenberg 		case DRBD_PROT_C:
267021ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_WRITE_ACK;
267121ae5d7fSLars Ellenberg 			break;
267221ae5d7fSLars Ellenberg 		case DRBD_PROT_B:
267321ae5d7fSLars Ellenberg 			dp_flags |= DP_SEND_RECEIVE_ACK;
267421ae5d7fSLars Ellenberg 			break;
267521ae5d7fSLars Ellenberg 		}
267621ae5d7fSLars Ellenberg 	}
2677302bdeaeSPhilipp Reisner 	rcu_read_unlock();
267821ae5d7fSLars Ellenberg 
267921ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_WRITE_ACK) {
268021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_SEND_WRITE_ACK;
268121ae5d7fSLars Ellenberg 		inc_unacked(device);
268221ae5d7fSLars Ellenberg 		/* corresponding dec_unacked() in e_end_block()
268321ae5d7fSLars Ellenberg 		 * respective _drbd_clear_done_ee */
268421ae5d7fSLars Ellenberg 	}
268521ae5d7fSLars Ellenberg 
268621ae5d7fSLars Ellenberg 	if (dp_flags & DP_SEND_RECEIVE_ACK) {
268721ae5d7fSLars Ellenberg 		/* I really don't like it that the receiver thread
268821ae5d7fSLars Ellenberg 		 * sends on the msock, but anyways */
26895dd2ca19SAndreas Gruenbacher 		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
269021ae5d7fSLars Ellenberg 	}
269121ae5d7fSLars Ellenberg 
2692302bdeaeSPhilipp Reisner 	if (tp) {
269321ae5d7fSLars Ellenberg 		/* two primaries implies protocol C */
269421ae5d7fSLars Ellenberg 		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2695302bdeaeSPhilipp Reisner 		peer_req->flags |= EE_IN_INTERVAL_TREE;
269669a22773SAndreas Gruenbacher 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26977be8da07SAndreas Gruenbacher 		if (err)
2698b411b363SPhilipp Reisner 			goto out_interrupted;
26990500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2700b30ab791SAndreas Gruenbacher 		err = handle_write_conflicts(device, peer_req);
27017be8da07SAndreas Gruenbacher 		if (err) {
27020500813fSAndreas Gruenbacher 			spin_unlock_irq(&device->resource->req_lock);
27037be8da07SAndreas Gruenbacher 			if (err == -ENOENT) {
2704b30ab791SAndreas Gruenbacher 				put_ldev(device);
270582bc0194SAndreas Gruenbacher 				return 0;
2706b411b363SPhilipp Reisner 			}
2707b411b363SPhilipp Reisner 			goto out_interrupted;
2708b411b363SPhilipp Reisner 		}
2709b874d231SPhilipp Reisner 	} else {
271069a22773SAndreas Gruenbacher 		update_peer_seq(peer_device, peer_seq);
27110500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
2712b874d231SPhilipp Reisner 	}
27139104d31aSLars Ellenberg 	/* TRIM and WRITE_SAME are processed synchronously,
27149104d31aSLars Ellenberg 	 * we wait for all pending requests, respectively wait for
2715a0fb3c47SLars Ellenberg 	 * active_ee to become empty in drbd_submit_peer_request();
2716a0fb3c47SLars Ellenberg 	 * better not add ourselves here. */
2717f31e583aSLars Ellenberg 	if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
2718b9ed7080SLars Ellenberg 		list_add_tail(&peer_req->w.list, &device->active_ee);
27190500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
2720b411b363SPhilipp Reisner 
2721b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_SYNC_TARGET)
2722b30ab791SAndreas Gruenbacher 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2723b6a370baSPhilipp Reisner 
2724b30ab791SAndreas Gruenbacher 	if (device->state.pdsk < D_INCONSISTENT) {
2725b411b363SPhilipp Reisner 		/* In case we have the only disk of the cluster, */
2726b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2727db830c46SAndreas Gruenbacher 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
27284dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &peer_req->i);
272921ae5d7fSLars Ellenberg 		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2730b411b363SPhilipp Reisner 	}
2731b411b363SPhilipp Reisner 
2732bb3cc85eSMike Christie 	err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2733bb3cc85eSMike Christie 				       DRBD_FAULT_DT_WR);
273482bc0194SAndreas Gruenbacher 	if (!err)
273582bc0194SAndreas Gruenbacher 		return 0;
2736b411b363SPhilipp Reisner 
273710f6d992SLars Ellenberg 	/* don't care for the reason here */
2738d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
27390500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
2740a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
2741b30ab791SAndreas Gruenbacher 	drbd_remove_epoch_entry_interval(device, peer_req);
27420500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
274321ae5d7fSLars Ellenberg 	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
274421ae5d7fSLars Ellenberg 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2745b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &peer_req->i);
274621ae5d7fSLars Ellenberg 	}
274722cc37a9SLars Ellenberg 
2748b411b363SPhilipp Reisner out_interrupted:
27497e5fec31SFabian Frederick 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2750b30ab791SAndreas Gruenbacher 	put_ldev(device);
2751b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
275282bc0194SAndreas Gruenbacher 	return err;
2753b411b363SPhilipp Reisner }
2754b411b363SPhilipp Reisner 
27550f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27560f0601f4SLars Ellenberg  * and current sync rate is above c_min_rate.
27570f0601f4SLars Ellenberg  *
27580f0601f4SLars Ellenberg  * To decide whether or not the lower device is busy, we use a scheme similar
27590f0601f4SLars Ellenberg  * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27600f0601f4SLars Ellenberg  * (more than 64 sectors) of activity we cannot account for with our own resync
27610f0601f4SLars Ellenberg  * activity, it obviously is "busy".
27620f0601f4SLars Ellenberg  *
27630f0601f4SLars Ellenberg  * The current sync rate used here uses only the most recent two step marks,
27640f0601f4SLars Ellenberg  * to have a short time average so we can react faster.
27650f0601f4SLars Ellenberg  */
2766ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2767ad3fee79SLars Ellenberg 		bool throttle_if_app_is_waiting)
2768e8299874SLars Ellenberg {
2769e8299874SLars Ellenberg 	struct lc_element *tmp;
2770ad3fee79SLars Ellenberg 	bool throttle = drbd_rs_c_min_rate_throttle(device);
2771e8299874SLars Ellenberg 
2772ad3fee79SLars Ellenberg 	if (!throttle || throttle_if_app_is_waiting)
2773ad3fee79SLars Ellenberg 		return throttle;
2774e8299874SLars Ellenberg 
2775e8299874SLars Ellenberg 	spin_lock_irq(&device->al_lock);
2776e8299874SLars Ellenberg 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2777e8299874SLars Ellenberg 	if (tmp) {
2778e8299874SLars Ellenberg 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2779e8299874SLars Ellenberg 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
2780e8299874SLars Ellenberg 			throttle = false;
2781ad3fee79SLars Ellenberg 		/* Do not slow down if app IO is already waiting for this extent,
2782ad3fee79SLars Ellenberg 		 * and our progress is necessary for application IO to complete. */
2783e8299874SLars Ellenberg 	}
2784e8299874SLars Ellenberg 	spin_unlock_irq(&device->al_lock);
2785e8299874SLars Ellenberg 
2786e8299874SLars Ellenberg 	return throttle;
2787e8299874SLars Ellenberg }
2788e8299874SLars Ellenberg 
2789e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27900f0601f4SLars Ellenberg {
2791b30ab791SAndreas Gruenbacher 	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
27920f0601f4SLars Ellenberg 	unsigned long db, dt, dbdt;
2793daeda1ccSPhilipp Reisner 	unsigned int c_min_rate;
2794e8299874SLars Ellenberg 	int curr_events;
2795daeda1ccSPhilipp Reisner 
2796daeda1ccSPhilipp Reisner 	rcu_read_lock();
2797b30ab791SAndreas Gruenbacher 	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2798daeda1ccSPhilipp Reisner 	rcu_read_unlock();
27990f0601f4SLars Ellenberg 
28000f0601f4SLars Ellenberg 	/* feature disabled? */
2801daeda1ccSPhilipp Reisner 	if (c_min_rate == 0)
2802e8299874SLars Ellenberg 		return false;
2803e3555d85SPhilipp Reisner 
280459767fbdSMichael Callahan 	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
2805b30ab791SAndreas Gruenbacher 			atomic_read(&device->rs_sect_ev);
2806ad3fee79SLars Ellenberg 
2807ad3fee79SLars Ellenberg 	if (atomic_read(&device->ap_actlog_cnt)
2808ff8bd88bSLars Ellenberg 	    || curr_events - device->rs_last_events > 64) {
28090f0601f4SLars Ellenberg 		unsigned long rs_left;
28100f0601f4SLars Ellenberg 		int i;
28110f0601f4SLars Ellenberg 
2812b30ab791SAndreas Gruenbacher 		device->rs_last_events = curr_events;
28130f0601f4SLars Ellenberg 
28140f0601f4SLars Ellenberg 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
28150f0601f4SLars Ellenberg 		 * approx. */
2816b30ab791SAndreas Gruenbacher 		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
28172649f080SLars Ellenberg 
2818b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2819b30ab791SAndreas Gruenbacher 			rs_left = device->ov_left;
28202649f080SLars Ellenberg 		else
2821b30ab791SAndreas Gruenbacher 			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
28220f0601f4SLars Ellenberg 
2823b30ab791SAndreas Gruenbacher 		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
28240f0601f4SLars Ellenberg 		if (!dt)
28250f0601f4SLars Ellenberg 			dt++;
2826b30ab791SAndreas Gruenbacher 		db = device->rs_mark_left[i] - rs_left;
28270f0601f4SLars Ellenberg 		dbdt = Bit2KB(db/dt);
28280f0601f4SLars Ellenberg 
2829daeda1ccSPhilipp Reisner 		if (dbdt > c_min_rate)
2830e8299874SLars Ellenberg 			return true;
28310f0601f4SLars Ellenberg 	}
2832e8299874SLars Ellenberg 	return false;
28330f0601f4SLars Ellenberg }
28340f0601f4SLars Ellenberg 
2835bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2836b411b363SPhilipp Reisner {
28379f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2838b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
2839b411b363SPhilipp Reisner 	sector_t sector;
28404a76b161SAndreas Gruenbacher 	sector_t capacity;
2841db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
2842b411b363SPhilipp Reisner 	struct digest_info *di = NULL;
2843b18b37beSPhilipp Reisner 	int size, verb;
2844b411b363SPhilipp Reisner 	unsigned int fault_type;
2845e658983aSAndreas Gruenbacher 	struct p_block_req *p =	pi->data;
28464a76b161SAndreas Gruenbacher 
28479f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
28489f4fe9adSAndreas Gruenbacher 	if (!peer_device)
28494a76b161SAndreas Gruenbacher 		return -EIO;
28509f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
2851b30ab791SAndreas Gruenbacher 	capacity = drbd_get_capacity(device->this_bdev);
2852b411b363SPhilipp Reisner 
2853b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
2854b411b363SPhilipp Reisner 	size   = be32_to_cpu(p->blksize);
2855b411b363SPhilipp Reisner 
2856c670a398SAndreas Gruenbacher 	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2857d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2858b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
285982bc0194SAndreas Gruenbacher 		return -EINVAL;
2860b411b363SPhilipp Reisner 	}
2861b411b363SPhilipp Reisner 	if (sector + (size>>9) > capacity) {
2862d0180171SAndreas Gruenbacher 		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2863b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
286482bc0194SAndreas Gruenbacher 		return -EINVAL;
2865b411b363SPhilipp Reisner 	}
2866b411b363SPhilipp Reisner 
2867b30ab791SAndreas Gruenbacher 	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2868b18b37beSPhilipp Reisner 		verb = 1;
2869e2857216SAndreas Gruenbacher 		switch (pi->cmd) {
2870b18b37beSPhilipp Reisner 		case P_DATA_REQUEST:
287169a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2872b18b37beSPhilipp Reisner 			break;
2873700ca8c0SPhilipp Reisner 		case P_RS_THIN_REQ:
2874b18b37beSPhilipp Reisner 		case P_RS_DATA_REQUEST:
2875b18b37beSPhilipp Reisner 		case P_CSUM_RS_REQUEST:
2876b18b37beSPhilipp Reisner 		case P_OV_REQUEST:
287769a22773SAndreas Gruenbacher 			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2878b18b37beSPhilipp Reisner 			break;
2879b18b37beSPhilipp Reisner 		case P_OV_REPLY:
2880b18b37beSPhilipp Reisner 			verb = 0;
2881b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
288269a22773SAndreas Gruenbacher 			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2883b18b37beSPhilipp Reisner 			break;
2884b18b37beSPhilipp Reisner 		default:
288549ba9b1bSAndreas Gruenbacher 			BUG();
2886b18b37beSPhilipp Reisner 		}
2887b18b37beSPhilipp Reisner 		if (verb && __ratelimit(&drbd_ratelimit_state))
2888d0180171SAndreas Gruenbacher 			drbd_err(device, "Can not satisfy peer's read request, "
2889b411b363SPhilipp Reisner 			    "no local data.\n");
2890b18b37beSPhilipp Reisner 
2891a821cc4aSLars Ellenberg 		/* drain possibly payload */
289269a22773SAndreas Gruenbacher 		return drbd_drain_block(peer_device, pi->size);
2893b411b363SPhilipp Reisner 	}
2894b411b363SPhilipp Reisner 
2895b411b363SPhilipp Reisner 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2896b411b363SPhilipp Reisner 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
2897b411b363SPhilipp Reisner 	 * which in turn might block on the other node at this very place.  */
2898a0fb3c47SLars Ellenberg 	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28999104d31aSLars Ellenberg 			size, GFP_NOIO);
2900db830c46SAndreas Gruenbacher 	if (!peer_req) {
2901b30ab791SAndreas Gruenbacher 		put_ldev(device);
290282bc0194SAndreas Gruenbacher 		return -ENOMEM;
2903b411b363SPhilipp Reisner 	}
2904b411b363SPhilipp Reisner 
2905e2857216SAndreas Gruenbacher 	switch (pi->cmd) {
2906b411b363SPhilipp Reisner 	case P_DATA_REQUEST:
2907a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_data_req;
2908b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_DT_RD;
290980a40e43SLars Ellenberg 		/* application IO, don't drbd_rs_begin_io */
291021ae5d7fSLars Ellenberg 		peer_req->flags |= EE_APPLICATION;
291180a40e43SLars Ellenberg 		goto submit;
291280a40e43SLars Ellenberg 
2913700ca8c0SPhilipp Reisner 	case P_RS_THIN_REQ:
2914700ca8c0SPhilipp Reisner 		/* If at some point in the future we have a smart way to
2915700ca8c0SPhilipp Reisner 		   find out if this data block is completely deallocated,
2916700ca8c0SPhilipp Reisner 		   then we would do something smarter here than reading
2917700ca8c0SPhilipp Reisner 		   the block... */
2918700ca8c0SPhilipp Reisner 		peer_req->flags |= EE_RS_THIN_REQ;
2919d769a992SGustavo A. R. Silva 		/* fall through */
2920b411b363SPhilipp Reisner 	case P_RS_DATA_REQUEST:
2921a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_rsdata_req;
2922b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
29235f9915bbSLars Ellenberg 		/* used in the sector offset progress display */
2924b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2925b411b363SPhilipp Reisner 		break;
2926b411b363SPhilipp Reisner 
2927b411b363SPhilipp Reisner 	case P_OV_REPLY:
2928b411b363SPhilipp Reisner 	case P_CSUM_RS_REQUEST:
2929b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2930e2857216SAndreas Gruenbacher 		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2931b411b363SPhilipp Reisner 		if (!di)
2932b411b363SPhilipp Reisner 			goto out_free_e;
2933b411b363SPhilipp Reisner 
2934e2857216SAndreas Gruenbacher 		di->digest_size = pi->size;
2935b411b363SPhilipp Reisner 		di->digest = (((char *)di)+sizeof(struct digest_info));
2936b411b363SPhilipp Reisner 
2937db830c46SAndreas Gruenbacher 		peer_req->digest = di;
2938db830c46SAndreas Gruenbacher 		peer_req->flags |= EE_HAS_DIGEST;
2939c36c3cedSLars Ellenberg 
29409f4fe9adSAndreas Gruenbacher 		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2941b411b363SPhilipp Reisner 			goto out_free_e;
2942b411b363SPhilipp Reisner 
2943e2857216SAndreas Gruenbacher 		if (pi->cmd == P_CSUM_RS_REQUEST) {
29449f4fe9adSAndreas Gruenbacher 			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2945a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_csum_rs_req;
29465f9915bbSLars Ellenberg 			/* used in the sector offset progress display */
2947b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2948aaaba345SLars Ellenberg 			/* remember to report stats in drbd_resync_finished */
2949aaaba345SLars Ellenberg 			device->use_csums = true;
2950e2857216SAndreas Gruenbacher 		} else if (pi->cmd == P_OV_REPLY) {
29512649f080SLars Ellenberg 			/* track progress, we may need to throttle */
2952b30ab791SAndreas Gruenbacher 			atomic_add(size >> 9, &device->rs_sect_in);
2953a8cd15baSAndreas Gruenbacher 			peer_req->w.cb = w_e_end_ov_reply;
2954b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
29550f0601f4SLars Ellenberg 			/* drbd_rs_begin_io done when we sent this request,
29560f0601f4SLars Ellenberg 			 * but accounting still needs to be done. */
29570f0601f4SLars Ellenberg 			goto submit_for_resync;
2958b411b363SPhilipp Reisner 		}
2959b411b363SPhilipp Reisner 		break;
2960b411b363SPhilipp Reisner 
2961b411b363SPhilipp Reisner 	case P_OV_REQUEST:
2962b30ab791SAndreas Gruenbacher 		if (device->ov_start_sector == ~(sector_t)0 &&
29639f4fe9adSAndreas Gruenbacher 		    peer_device->connection->agreed_pro_version >= 90) {
2964de228bbaSLars Ellenberg 			unsigned long now = jiffies;
2965de228bbaSLars Ellenberg 			int i;
2966b30ab791SAndreas Gruenbacher 			device->ov_start_sector = sector;
2967b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
2968b30ab791SAndreas Gruenbacher 			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2969b30ab791SAndreas Gruenbacher 			device->rs_total = device->ov_left;
2970de228bbaSLars Ellenberg 			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2971b30ab791SAndreas Gruenbacher 				device->rs_mark_left[i] = device->ov_left;
2972b30ab791SAndreas Gruenbacher 				device->rs_mark_time[i] = now;
2973de228bbaSLars Ellenberg 			}
2974d0180171SAndreas Gruenbacher 			drbd_info(device, "Online Verify start sector: %llu\n",
2975b411b363SPhilipp Reisner 					(unsigned long long)sector);
2976b411b363SPhilipp Reisner 		}
2977a8cd15baSAndreas Gruenbacher 		peer_req->w.cb = w_e_end_ov_req;
2978b411b363SPhilipp Reisner 		fault_type = DRBD_FAULT_RS_RD;
2979b411b363SPhilipp Reisner 		break;
2980b411b363SPhilipp Reisner 
2981b411b363SPhilipp Reisner 	default:
298249ba9b1bSAndreas Gruenbacher 		BUG();
2983b411b363SPhilipp Reisner 	}
2984b411b363SPhilipp Reisner 
29850f0601f4SLars Ellenberg 	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
29860f0601f4SLars Ellenberg 	 * wrt the receiver, but it is not as straightforward as it may seem.
29870f0601f4SLars Ellenberg 	 * Various places in the resync start and stop logic assume resync
29880f0601f4SLars Ellenberg 	 * requests are processed in order, requeuing this on the worker thread
29890f0601f4SLars Ellenberg 	 * introduces a bunch of new code for synchronization between threads.
29900f0601f4SLars Ellenberg 	 *
29910f0601f4SLars Ellenberg 	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
29920f0601f4SLars Ellenberg 	 * "forever", throttling after drbd_rs_begin_io will lock that extent
29930f0601f4SLars Ellenberg 	 * for application writes for the same time.  For now, just throttle
29940f0601f4SLars Ellenberg 	 * here, where the rest of the code expects the receiver to sleep for
29950f0601f4SLars Ellenberg 	 * a while, anyways.
29960f0601f4SLars Ellenberg 	 */
2997b411b363SPhilipp Reisner 
29980f0601f4SLars Ellenberg 	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
29990f0601f4SLars Ellenberg 	 * this defers syncer requests for some time, before letting at least
30000f0601f4SLars Ellenberg 	 * on request through.  The resync controller on the receiving side
30010f0601f4SLars Ellenberg 	 * will adapt to the incoming rate accordingly.
30020f0601f4SLars Ellenberg 	 *
30030f0601f4SLars Ellenberg 	 * We cannot throttle here if remote is Primary/SyncTarget:
30040f0601f4SLars Ellenberg 	 * we would also throttle its application reads.
30050f0601f4SLars Ellenberg 	 * In that case, throttling is done on the SyncTarget only.
30060f0601f4SLars Ellenberg 	 */
3007c5a2c150SLars Ellenberg 
3008c5a2c150SLars Ellenberg 	/* Even though this may be a resync request, we do add to "read_ee";
3009c5a2c150SLars Ellenberg 	 * "sync_ee" is only used for resync WRITEs.
3010c5a2c150SLars Ellenberg 	 * Add to list early, so debugfs can find this request
3011c5a2c150SLars Ellenberg 	 * even if we have to sleep below. */
3012c5a2c150SLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
3013c5a2c150SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
3014c5a2c150SLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
3015c5a2c150SLars Ellenberg 
3016944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
3017ad3fee79SLars Ellenberg 	if (device->state.peer != R_PRIMARY
3018ad3fee79SLars Ellenberg 	&& drbd_rs_should_slow_down(device, sector, false))
3019e3555d85SPhilipp Reisner 		schedule_timeout_uninterruptible(HZ/10);
3020944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_rs_begin_io);
3021b30ab791SAndreas Gruenbacher 	if (drbd_rs_begin_io(device, sector))
302280a40e43SLars Ellenberg 		goto out_free_e;
3023b411b363SPhilipp Reisner 
30240f0601f4SLars Ellenberg submit_for_resync:
3025b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
30260f0601f4SLars Ellenberg 
302780a40e43SLars Ellenberg submit:
3028944410e9SLars Ellenberg 	update_receiver_timing_details(connection, drbd_submit_peer_request);
3029b30ab791SAndreas Gruenbacher 	inc_unacked(device);
3030bb3cc85eSMike Christie 	if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
3031bb3cc85eSMike Christie 				     fault_type) == 0)
303282bc0194SAndreas Gruenbacher 		return 0;
3033b411b363SPhilipp Reisner 
303410f6d992SLars Ellenberg 	/* don't care for the reason here */
3035d0180171SAndreas Gruenbacher 	drbd_err(device, "submit failed, triggering re-connect\n");
3036c5a2c150SLars Ellenberg 
3037c5a2c150SLars Ellenberg out_free_e:
30380500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
3039a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
30400500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
304122cc37a9SLars Ellenberg 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
304222cc37a9SLars Ellenberg 
3043b30ab791SAndreas Gruenbacher 	put_ldev(device);
3044b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
304582bc0194SAndreas Gruenbacher 	return -EIO;
3046b411b363SPhilipp Reisner }
3047b411b363SPhilipp Reisner 
304869a22773SAndreas Gruenbacher /**
304969a22773SAndreas Gruenbacher  * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
305069a22773SAndreas Gruenbacher  */
305169a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3052b411b363SPhilipp Reisner {
305369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3054b411b363SPhilipp Reisner 	int self, peer, rv = -100;
3055b411b363SPhilipp Reisner 	unsigned long ch_self, ch_peer;
305644ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_0p;
3057b411b363SPhilipp Reisner 
3058b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & 1;
3059b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & 1;
3060b411b363SPhilipp Reisner 
3061b30ab791SAndreas Gruenbacher 	ch_peer = device->p_uuid[UI_SIZE];
3062b30ab791SAndreas Gruenbacher 	ch_self = device->comm_bm_set;
3063b411b363SPhilipp Reisner 
306444ed167dSPhilipp Reisner 	rcu_read_lock();
306569a22773SAndreas Gruenbacher 	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
306644ed167dSPhilipp Reisner 	rcu_read_unlock();
306744ed167dSPhilipp Reisner 	switch (after_sb_0p) {
3068b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3069b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3070b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
307144ed167dSPhilipp Reisner 	case ASB_VIOLENTLY:
3072d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3073b411b363SPhilipp Reisner 		break;
3074b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3075b411b363SPhilipp Reisner 		break;
3076b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3077b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3078b411b363SPhilipp Reisner 			rv = -1;
3079b411b363SPhilipp Reisner 			break;
3080b411b363SPhilipp Reisner 		}
3081b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3082b411b363SPhilipp Reisner 			rv =  1;
3083b411b363SPhilipp Reisner 			break;
3084b411b363SPhilipp Reisner 		}
3085e16fb3a8SGustavo A. R. Silva 		/* Else fall through - to one of the other strategies... */
3086b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3087b411b363SPhilipp Reisner 		if (self == 0 && peer == 1) {
3088b411b363SPhilipp Reisner 			rv = 1;
3089b411b363SPhilipp Reisner 			break;
3090b411b363SPhilipp Reisner 		}
3091b411b363SPhilipp Reisner 		if (self == 1 && peer == 0) {
3092b411b363SPhilipp Reisner 			rv = -1;
3093b411b363SPhilipp Reisner 			break;
3094b411b363SPhilipp Reisner 		}
3095b411b363SPhilipp Reisner 		/* Else fall through to one of the other strategies... */
3096d0180171SAndreas Gruenbacher 		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3097b411b363SPhilipp Reisner 		     "Using discard-least-changes instead\n");
3098d769a992SGustavo A. R. Silva 		/* fall through */
3099b411b363SPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3100b411b363SPhilipp Reisner 		if (ch_peer == 0 && ch_self == 0) {
310169a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3102b411b363SPhilipp Reisner 				? -1 : 1;
3103b411b363SPhilipp Reisner 			break;
3104b411b363SPhilipp Reisner 		} else {
3105b411b363SPhilipp Reisner 			if (ch_peer == 0) { rv =  1; break; }
3106b411b363SPhilipp Reisner 			if (ch_self == 0) { rv = -1; break; }
3107b411b363SPhilipp Reisner 		}
310844ed167dSPhilipp Reisner 		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3109b411b363SPhilipp Reisner 			break;
3110e16fb3a8SGustavo A. R. Silva 		/* else, fall through */
3111b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3112b411b363SPhilipp Reisner 		if	(ch_self < ch_peer)
3113b411b363SPhilipp Reisner 			rv = -1;
3114b411b363SPhilipp Reisner 		else if (ch_self > ch_peer)
3115b411b363SPhilipp Reisner 			rv =  1;
3116b411b363SPhilipp Reisner 		else /* ( ch_self == ch_peer ) */
3117b411b363SPhilipp Reisner 		     /* Well, then use something else. */
311869a22773SAndreas Gruenbacher 			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3119b411b363SPhilipp Reisner 				? -1 : 1;
3120b411b363SPhilipp Reisner 		break;
3121b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3122b411b363SPhilipp Reisner 		rv = -1;
3123b411b363SPhilipp Reisner 		break;
3124b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3125b411b363SPhilipp Reisner 		rv =  1;
3126b411b363SPhilipp Reisner 	}
3127b411b363SPhilipp Reisner 
3128b411b363SPhilipp Reisner 	return rv;
3129b411b363SPhilipp Reisner }
3130b411b363SPhilipp Reisner 
313169a22773SAndreas Gruenbacher /**
313269a22773SAndreas Gruenbacher  * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
313369a22773SAndreas Gruenbacher  */
313469a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3135b411b363SPhilipp Reisner {
313669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31376184ea21SAndreas Gruenbacher 	int hg, rv = -100;
313844ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_1p;
3139b411b363SPhilipp Reisner 
314044ed167dSPhilipp Reisner 	rcu_read_lock();
314169a22773SAndreas Gruenbacher 	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
314244ed167dSPhilipp Reisner 	rcu_read_unlock();
314344ed167dSPhilipp Reisner 	switch (after_sb_1p) {
3144b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3145b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3146b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3147b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3148b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
314944ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3150d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3151b411b363SPhilipp Reisner 		break;
3152b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3153b411b363SPhilipp Reisner 		break;
3154b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
315569a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3156b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_SECONDARY)
3157b411b363SPhilipp Reisner 			rv = hg;
3158b30ab791SAndreas Gruenbacher 		if (hg == 1  && device->state.role == R_PRIMARY)
3159b411b363SPhilipp Reisner 			rv = hg;
3160b411b363SPhilipp Reisner 		break;
3161b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
316269a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3163b411b363SPhilipp Reisner 		break;
3164b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
3165b30ab791SAndreas Gruenbacher 		return device->state.role == R_PRIMARY ? 1 : -1;
3166b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
316769a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3168b30ab791SAndreas Gruenbacher 		if (hg == -1 && device->state.role == R_PRIMARY) {
3169bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3170bb437946SAndreas Gruenbacher 
3171b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3172b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3173b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3174b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3175bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3176b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3177b411b363SPhilipp Reisner 			} else {
3178d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3179b411b363SPhilipp Reisner 				rv = hg;
3180b411b363SPhilipp Reisner 			}
3181b411b363SPhilipp Reisner 		} else
3182b411b363SPhilipp Reisner 			rv = hg;
3183b411b363SPhilipp Reisner 	}
3184b411b363SPhilipp Reisner 
3185b411b363SPhilipp Reisner 	return rv;
3186b411b363SPhilipp Reisner }
3187b411b363SPhilipp Reisner 
318869a22773SAndreas Gruenbacher /**
318969a22773SAndreas Gruenbacher  * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
319069a22773SAndreas Gruenbacher  */
319169a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3192b411b363SPhilipp Reisner {
319369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
31946184ea21SAndreas Gruenbacher 	int hg, rv = -100;
319544ed167dSPhilipp Reisner 	enum drbd_after_sb_p after_sb_2p;
3196b411b363SPhilipp Reisner 
319744ed167dSPhilipp Reisner 	rcu_read_lock();
319869a22773SAndreas Gruenbacher 	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
319944ed167dSPhilipp Reisner 	rcu_read_unlock();
320044ed167dSPhilipp Reisner 	switch (after_sb_2p) {
3201b411b363SPhilipp Reisner 	case ASB_DISCARD_YOUNGER_PRI:
3202b411b363SPhilipp Reisner 	case ASB_DISCARD_OLDER_PRI:
3203b411b363SPhilipp Reisner 	case ASB_DISCARD_LEAST_CHG:
3204b411b363SPhilipp Reisner 	case ASB_DISCARD_LOCAL:
3205b411b363SPhilipp Reisner 	case ASB_DISCARD_REMOTE:
3206b411b363SPhilipp Reisner 	case ASB_CONSENSUS:
3207b411b363SPhilipp Reisner 	case ASB_DISCARD_SECONDARY:
320844ed167dSPhilipp Reisner 	case ASB_DISCARD_ZERO_CHG:
3209d0180171SAndreas Gruenbacher 		drbd_err(device, "Configuration error.\n");
3210b411b363SPhilipp Reisner 		break;
3211b411b363SPhilipp Reisner 	case ASB_VIOLENTLY:
321269a22773SAndreas Gruenbacher 		rv = drbd_asb_recover_0p(peer_device);
3213b411b363SPhilipp Reisner 		break;
3214b411b363SPhilipp Reisner 	case ASB_DISCONNECT:
3215b411b363SPhilipp Reisner 		break;
3216b411b363SPhilipp Reisner 	case ASB_CALL_HELPER:
321769a22773SAndreas Gruenbacher 		hg = drbd_asb_recover_0p(peer_device);
3218b411b363SPhilipp Reisner 		if (hg == -1) {
3219bb437946SAndreas Gruenbacher 			enum drbd_state_rv rv2;
3220bb437946SAndreas Gruenbacher 
3221b411b363SPhilipp Reisner 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3222b411b363SPhilipp Reisner 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
3223b411b363SPhilipp Reisner 			  * we do not need to wait for the after state change work either. */
3224b30ab791SAndreas Gruenbacher 			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3225bb437946SAndreas Gruenbacher 			if (rv2 != SS_SUCCESS) {
3226b30ab791SAndreas Gruenbacher 				drbd_khelper(device, "pri-lost-after-sb");
3227b411b363SPhilipp Reisner 			} else {
3228d0180171SAndreas Gruenbacher 				drbd_warn(device, "Successfully gave up primary role.\n");
3229b411b363SPhilipp Reisner 				rv = hg;
3230b411b363SPhilipp Reisner 			}
3231b411b363SPhilipp Reisner 		} else
3232b411b363SPhilipp Reisner 			rv = hg;
3233b411b363SPhilipp Reisner 	}
3234b411b363SPhilipp Reisner 
3235b411b363SPhilipp Reisner 	return rv;
3236b411b363SPhilipp Reisner }
3237b411b363SPhilipp Reisner 
3238b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3239b411b363SPhilipp Reisner 			   u64 bits, u64 flags)
3240b411b363SPhilipp Reisner {
3241b411b363SPhilipp Reisner 	if (!uuid) {
3242d0180171SAndreas Gruenbacher 		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3243b411b363SPhilipp Reisner 		return;
3244b411b363SPhilipp Reisner 	}
3245d0180171SAndreas Gruenbacher 	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3246b411b363SPhilipp Reisner 	     text,
3247b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_CURRENT],
3248b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_BITMAP],
3249b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_START],
3250b411b363SPhilipp Reisner 	     (unsigned long long)uuid[UI_HISTORY_END],
3251b411b363SPhilipp Reisner 	     (unsigned long long)bits,
3252b411b363SPhilipp Reisner 	     (unsigned long long)flags);
3253b411b363SPhilipp Reisner }
3254b411b363SPhilipp Reisner 
3255b411b363SPhilipp Reisner /*
3256b411b363SPhilipp Reisner   100	after split brain try auto recover
3257b411b363SPhilipp Reisner     2	C_SYNC_SOURCE set BitMap
3258b411b363SPhilipp Reisner     1	C_SYNC_SOURCE use BitMap
3259b411b363SPhilipp Reisner     0	no Sync
3260b411b363SPhilipp Reisner    -1	C_SYNC_TARGET use BitMap
3261b411b363SPhilipp Reisner    -2	C_SYNC_TARGET set BitMap
3262b411b363SPhilipp Reisner  -100	after split brain, disconnect
3263b411b363SPhilipp Reisner -1000	unrelated data
32644a23f264SPhilipp Reisner -1091   requires proto 91
32654a23f264SPhilipp Reisner -1096   requires proto 96
3266b411b363SPhilipp Reisner  */
3267f2d3d75bSLars Ellenberg 
3268f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3269b411b363SPhilipp Reisner {
327044a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
327144a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3272b411b363SPhilipp Reisner 	u64 self, peer;
3273b411b363SPhilipp Reisner 	int i, j;
3274b411b363SPhilipp Reisner 
3275b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3276b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3277b411b363SPhilipp Reisner 
3278b411b363SPhilipp Reisner 	*rule_nr = 10;
3279b411b363SPhilipp Reisner 	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3280b411b363SPhilipp Reisner 		return 0;
3281b411b363SPhilipp Reisner 
3282b411b363SPhilipp Reisner 	*rule_nr = 20;
3283b411b363SPhilipp Reisner 	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3284b411b363SPhilipp Reisner 	     peer != UUID_JUST_CREATED)
3285b411b363SPhilipp Reisner 		return -2;
3286b411b363SPhilipp Reisner 
3287b411b363SPhilipp Reisner 	*rule_nr = 30;
3288b411b363SPhilipp Reisner 	if (self != UUID_JUST_CREATED &&
3289b411b363SPhilipp Reisner 	    (peer == UUID_JUST_CREATED || peer == (u64)0))
3290b411b363SPhilipp Reisner 		return 2;
3291b411b363SPhilipp Reisner 
3292b411b363SPhilipp Reisner 	if (self == peer) {
3293b411b363SPhilipp Reisner 		int rct, dc; /* roles at crash time */
3294b411b363SPhilipp Reisner 
3295b30ab791SAndreas Gruenbacher 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3296b411b363SPhilipp Reisner 
329744a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
32984a23f264SPhilipp Reisner 				return -1091;
3299b411b363SPhilipp Reisner 
3300b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3301b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3302d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3303b30ab791SAndreas Gruenbacher 				drbd_uuid_move_history(device);
3304b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3305b30ab791SAndreas Gruenbacher 				device->ldev->md.uuid[UI_BITMAP] = 0;
3306b411b363SPhilipp Reisner 
3307b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3308b30ab791SAndreas Gruenbacher 					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3309b411b363SPhilipp Reisner 				*rule_nr = 34;
3310b411b363SPhilipp Reisner 			} else {
3311d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3312b411b363SPhilipp Reisner 				*rule_nr = 36;
3313b411b363SPhilipp Reisner 			}
3314b411b363SPhilipp Reisner 
3315b411b363SPhilipp Reisner 			return 1;
3316b411b363SPhilipp Reisner 		}
3317b411b363SPhilipp Reisner 
3318b30ab791SAndreas Gruenbacher 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3319b411b363SPhilipp Reisner 
332044a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
33214a23f264SPhilipp Reisner 				return -1091;
3322b411b363SPhilipp Reisner 
3323b30ab791SAndreas Gruenbacher 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3324b30ab791SAndreas Gruenbacher 			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3325d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3326b411b363SPhilipp Reisner 
3327b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3328b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3329b30ab791SAndreas Gruenbacher 				device->p_uuid[UI_BITMAP] = 0UL;
3330b411b363SPhilipp Reisner 
3331b30ab791SAndreas Gruenbacher 				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3332b411b363SPhilipp Reisner 				*rule_nr = 35;
3333b411b363SPhilipp Reisner 			} else {
3334d0180171SAndreas Gruenbacher 				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3335b411b363SPhilipp Reisner 				*rule_nr = 37;
3336b411b363SPhilipp Reisner 			}
3337b411b363SPhilipp Reisner 
3338b411b363SPhilipp Reisner 			return -1;
3339b411b363SPhilipp Reisner 		}
3340b411b363SPhilipp Reisner 
3341b411b363SPhilipp Reisner 		/* Common power [off|failure] */
3342b30ab791SAndreas Gruenbacher 		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3343b30ab791SAndreas Gruenbacher 			(device->p_uuid[UI_FLAGS] & 2);
3344b411b363SPhilipp Reisner 		/* lowest bit is set when we were primary,
3345b411b363SPhilipp Reisner 		 * next bit (weight 2) is set when peer was primary */
3346b411b363SPhilipp Reisner 		*rule_nr = 40;
3347b411b363SPhilipp Reisner 
3348f2d3d75bSLars Ellenberg 		/* Neither has the "crashed primary" flag set,
3349f2d3d75bSLars Ellenberg 		 * only a replication link hickup. */
3350f2d3d75bSLars Ellenberg 		if (rct == 0)
3351f2d3d75bSLars Ellenberg 			return 0;
3352f2d3d75bSLars Ellenberg 
3353f2d3d75bSLars Ellenberg 		/* Current UUID equal and no bitmap uuid; does not necessarily
3354f2d3d75bSLars Ellenberg 		 * mean this was a "simultaneous hard crash", maybe IO was
3355f2d3d75bSLars Ellenberg 		 * frozen, so no UUID-bump happened.
3356f2d3d75bSLars Ellenberg 		 * This is a protocol change, overload DRBD_FF_WSAME as flag
3357f2d3d75bSLars Ellenberg 		 * for "new-enough" peer DRBD version. */
3358f2d3d75bSLars Ellenberg 		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3359f2d3d75bSLars Ellenberg 			*rule_nr = 41;
3360f2d3d75bSLars Ellenberg 			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3361f2d3d75bSLars Ellenberg 				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3362f2d3d75bSLars Ellenberg 				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3363f2d3d75bSLars Ellenberg 			}
3364f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3365f2d3d75bSLars Ellenberg 				/* At least one has the "crashed primary" bit set,
3366f2d3d75bSLars Ellenberg 				 * both are primary now, but neither has rotated its UUIDs?
3367f2d3d75bSLars Ellenberg 				 * "Can not happen." */
3368f2d3d75bSLars Ellenberg 				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3369f2d3d75bSLars Ellenberg 				return -100;
3370f2d3d75bSLars Ellenberg 			}
3371f2d3d75bSLars Ellenberg 			if (device->state.role == R_PRIMARY)
3372f2d3d75bSLars Ellenberg 				return 1;
3373f2d3d75bSLars Ellenberg 			return -1;
3374f2d3d75bSLars Ellenberg 		}
3375f2d3d75bSLars Ellenberg 
3376f2d3d75bSLars Ellenberg 		/* Both are secondary.
3377f2d3d75bSLars Ellenberg 		 * Really looks like recovery from simultaneous hard crash.
3378f2d3d75bSLars Ellenberg 		 * Check which had been primary before, and arbitrate. */
3379b411b363SPhilipp Reisner 		switch (rct) {
3380f2d3d75bSLars Ellenberg 		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3381b411b363SPhilipp Reisner 		case 1: /*  self_pri && !peer_pri */ return 1;
3382b411b363SPhilipp Reisner 		case 2: /* !self_pri &&  peer_pri */ return -1;
3383b411b363SPhilipp Reisner 		case 3: /*  self_pri &&  peer_pri */
338444a4d551SLars Ellenberg 			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3385b411b363SPhilipp Reisner 			return dc ? -1 : 1;
3386b411b363SPhilipp Reisner 		}
3387b411b363SPhilipp Reisner 	}
3388b411b363SPhilipp Reisner 
3389b411b363SPhilipp Reisner 	*rule_nr = 50;
3390b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3391b411b363SPhilipp Reisner 	if (self == peer)
3392b411b363SPhilipp Reisner 		return -1;
3393b411b363SPhilipp Reisner 
3394b411b363SPhilipp Reisner 	*rule_nr = 51;
3395b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3396b411b363SPhilipp Reisner 	if (self == peer) {
339744a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3398b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3399b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3400b30ab791SAndreas Gruenbacher 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3401b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3402b411b363SPhilipp Reisner 			   resync as sync source modifications of the peer's UUIDs. */
3403b411b363SPhilipp Reisner 
340444a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34054a23f264SPhilipp Reisner 				return -1091;
3406b411b363SPhilipp Reisner 
3407b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3408b30ab791SAndreas Gruenbacher 			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
34094a23f264SPhilipp Reisner 
3410d0180171SAndreas Gruenbacher 			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3411b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
34124a23f264SPhilipp Reisner 
3413b411b363SPhilipp Reisner 			return -1;
3414b411b363SPhilipp Reisner 		}
3415b411b363SPhilipp Reisner 	}
3416b411b363SPhilipp Reisner 
3417b411b363SPhilipp Reisner 	*rule_nr = 60;
3418b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3419b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3420b30ab791SAndreas Gruenbacher 		peer = device->p_uuid[i] & ~((u64)1);
3421b411b363SPhilipp Reisner 		if (self == peer)
3422b411b363SPhilipp Reisner 			return -2;
3423b411b363SPhilipp Reisner 	}
3424b411b363SPhilipp Reisner 
3425b411b363SPhilipp Reisner 	*rule_nr = 70;
3426b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3427b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3428b411b363SPhilipp Reisner 	if (self == peer)
3429b411b363SPhilipp Reisner 		return 1;
3430b411b363SPhilipp Reisner 
3431b411b363SPhilipp Reisner 	*rule_nr = 71;
3432b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3433b411b363SPhilipp Reisner 	if (self == peer) {
343444a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 96 ?
3435b30ab791SAndreas Gruenbacher 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3436b30ab791SAndreas Gruenbacher 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3437b30ab791SAndreas Gruenbacher 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3438b411b363SPhilipp Reisner 			/* The last P_SYNC_UUID did not get though. Undo the last start of
3439b411b363SPhilipp Reisner 			   resync as sync source modifications of our UUIDs. */
3440b411b363SPhilipp Reisner 
344144a4d551SLars Ellenberg 			if (connection->agreed_pro_version < 91)
34424a23f264SPhilipp Reisner 				return -1091;
3443b411b363SPhilipp Reisner 
3444b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3445b30ab791SAndreas Gruenbacher 			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3446b411b363SPhilipp Reisner 
3447d0180171SAndreas Gruenbacher 			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3448b30ab791SAndreas Gruenbacher 			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3449b30ab791SAndreas Gruenbacher 				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3450b411b363SPhilipp Reisner 
3451b411b363SPhilipp Reisner 			return 1;
3452b411b363SPhilipp Reisner 		}
3453b411b363SPhilipp Reisner 	}
3454b411b363SPhilipp Reisner 
3455b411b363SPhilipp Reisner 
3456b411b363SPhilipp Reisner 	*rule_nr = 80;
3457b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3458b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3459b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3460b411b363SPhilipp Reisner 		if (self == peer)
3461b411b363SPhilipp Reisner 			return 2;
3462b411b363SPhilipp Reisner 	}
3463b411b363SPhilipp Reisner 
3464b411b363SPhilipp Reisner 	*rule_nr = 90;
3465b30ab791SAndreas Gruenbacher 	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3466b30ab791SAndreas Gruenbacher 	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3467b411b363SPhilipp Reisner 	if (self == peer && self != ((u64)0))
3468b411b363SPhilipp Reisner 		return 100;
3469b411b363SPhilipp Reisner 
3470b411b363SPhilipp Reisner 	*rule_nr = 100;
3471b411b363SPhilipp Reisner 	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3472b30ab791SAndreas Gruenbacher 		self = device->ldev->md.uuid[i] & ~((u64)1);
3473b411b363SPhilipp Reisner 		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3474b30ab791SAndreas Gruenbacher 			peer = device->p_uuid[j] & ~((u64)1);
3475b411b363SPhilipp Reisner 			if (self == peer)
3476b411b363SPhilipp Reisner 				return -100;
3477b411b363SPhilipp Reisner 		}
3478b411b363SPhilipp Reisner 	}
3479b411b363SPhilipp Reisner 
3480b411b363SPhilipp Reisner 	return -1000;
3481b411b363SPhilipp Reisner }
3482b411b363SPhilipp Reisner 
3483b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3484b411b363SPhilipp Reisner    CONN_MASK (-1) on failure.
3485b411b363SPhilipp Reisner  */
348669a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
348769a22773SAndreas Gruenbacher 					   enum drbd_role peer_role,
3488b411b363SPhilipp Reisner 					   enum drbd_disk_state peer_disk) __must_hold(local)
3489b411b363SPhilipp Reisner {
349069a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
3491b411b363SPhilipp Reisner 	enum drbd_conns rv = C_MASK;
3492b411b363SPhilipp Reisner 	enum drbd_disk_state mydisk;
349344ed167dSPhilipp Reisner 	struct net_conf *nc;
3494d29e89e3SRoland Kammerer 	int hg, rule_nr, rr_conflict, tentative, always_asbp;
3495b411b363SPhilipp Reisner 
3496b30ab791SAndreas Gruenbacher 	mydisk = device->state.disk;
3497b411b363SPhilipp Reisner 	if (mydisk == D_NEGOTIATING)
3498b30ab791SAndreas Gruenbacher 		mydisk = device->new_state_tmp.disk;
3499b411b363SPhilipp Reisner 
3500d0180171SAndreas Gruenbacher 	drbd_info(device, "drbd_sync_handshake:\n");
35019f2247bbSPhilipp Reisner 
3502b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->ldev->md.uuid_lock);
3503b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3504b30ab791SAndreas Gruenbacher 	drbd_uuid_dump(device, "peer", device->p_uuid,
3505b30ab791SAndreas Gruenbacher 		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3506b411b363SPhilipp Reisner 
3507f2d3d75bSLars Ellenberg 	hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3508b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->ldev->md.uuid_lock);
3509b411b363SPhilipp Reisner 
3510d0180171SAndreas Gruenbacher 	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3511b411b363SPhilipp Reisner 
3512b411b363SPhilipp Reisner 	if (hg == -1000) {
3513d0180171SAndreas Gruenbacher 		drbd_alert(device, "Unrelated data, aborting!\n");
3514b411b363SPhilipp Reisner 		return C_MASK;
3515b411b363SPhilipp Reisner 	}
3516f2d3d75bSLars Ellenberg 	if (hg < -0x10000) {
3517f2d3d75bSLars Ellenberg 		int proto, fflags;
3518f2d3d75bSLars Ellenberg 		hg = -hg;
3519f2d3d75bSLars Ellenberg 		proto = hg & 0xff;
3520f2d3d75bSLars Ellenberg 		fflags = (hg >> 8) & 0xff;
3521f2d3d75bSLars Ellenberg 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3522f2d3d75bSLars Ellenberg 					proto, fflags);
3523f2d3d75bSLars Ellenberg 		return C_MASK;
3524f2d3d75bSLars Ellenberg 	}
35254a23f264SPhilipp Reisner 	if (hg < -1000) {
3526d0180171SAndreas Gruenbacher 		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3527b411b363SPhilipp Reisner 		return C_MASK;
3528b411b363SPhilipp Reisner 	}
3529b411b363SPhilipp Reisner 
3530b411b363SPhilipp Reisner 	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3531b411b363SPhilipp Reisner 	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
3532b411b363SPhilipp Reisner 		int f = (hg == -100) || abs(hg) == 2;
3533b411b363SPhilipp Reisner 		hg = mydisk > D_INCONSISTENT ? 1 : -1;
3534b411b363SPhilipp Reisner 		if (f)
3535b411b363SPhilipp Reisner 			hg = hg*2;
3536d0180171SAndreas Gruenbacher 		drbd_info(device, "Becoming sync %s due to disk states.\n",
3537b411b363SPhilipp Reisner 		     hg > 0 ? "source" : "target");
3538b411b363SPhilipp Reisner 	}
3539b411b363SPhilipp Reisner 
35403a11a487SAdam Gandelman 	if (abs(hg) == 100)
3541b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "initial-split-brain");
35423a11a487SAdam Gandelman 
354344ed167dSPhilipp Reisner 	rcu_read_lock();
354469a22773SAndreas Gruenbacher 	nc = rcu_dereference(peer_device->connection->net_conf);
3545d29e89e3SRoland Kammerer 	always_asbp = nc->always_asbp;
3546d29e89e3SRoland Kammerer 	rr_conflict = nc->rr_conflict;
3547d29e89e3SRoland Kammerer 	tentative = nc->tentative;
3548d29e89e3SRoland Kammerer 	rcu_read_unlock();
354944ed167dSPhilipp Reisner 
3550d29e89e3SRoland Kammerer 	if (hg == 100 || (hg == -100 && always_asbp)) {
3551b30ab791SAndreas Gruenbacher 		int pcount = (device->state.role == R_PRIMARY)
3552b411b363SPhilipp Reisner 			   + (peer_role == R_PRIMARY);
3553b411b363SPhilipp Reisner 		int forced = (hg == -100);
3554b411b363SPhilipp Reisner 
3555b411b363SPhilipp Reisner 		switch (pcount) {
3556b411b363SPhilipp Reisner 		case 0:
355769a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_0p(peer_device);
3558b411b363SPhilipp Reisner 			break;
3559b411b363SPhilipp Reisner 		case 1:
356069a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_1p(peer_device);
3561b411b363SPhilipp Reisner 			break;
3562b411b363SPhilipp Reisner 		case 2:
356369a22773SAndreas Gruenbacher 			hg = drbd_asb_recover_2p(peer_device);
3564b411b363SPhilipp Reisner 			break;
3565b411b363SPhilipp Reisner 		}
3566b411b363SPhilipp Reisner 		if (abs(hg) < 100) {
3567d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, %d primaries, "
3568b411b363SPhilipp Reisner 			     "automatically solved. Sync from %s node\n",
3569b411b363SPhilipp Reisner 			     pcount, (hg < 0) ? "peer" : "this");
3570b411b363SPhilipp Reisner 			if (forced) {
3571d0180171SAndreas Gruenbacher 				drbd_warn(device, "Doing a full sync, since"
3572b411b363SPhilipp Reisner 				     " UUIDs where ambiguous.\n");
3573b411b363SPhilipp Reisner 				hg = hg*2;
3574b411b363SPhilipp Reisner 			}
3575b411b363SPhilipp Reisner 		}
3576b411b363SPhilipp Reisner 	}
3577b411b363SPhilipp Reisner 
3578b411b363SPhilipp Reisner 	if (hg == -100) {
3579b30ab791SAndreas Gruenbacher 		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3580b411b363SPhilipp Reisner 			hg = -1;
3581b30ab791SAndreas Gruenbacher 		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3582b411b363SPhilipp Reisner 			hg = 1;
3583b411b363SPhilipp Reisner 
3584b411b363SPhilipp Reisner 		if (abs(hg) < 100)
3585d0180171SAndreas Gruenbacher 			drbd_warn(device, "Split-Brain detected, manually solved. "
3586b411b363SPhilipp Reisner 			     "Sync from %s node\n",
3587b411b363SPhilipp Reisner 			     (hg < 0) ? "peer" : "this");
3588b411b363SPhilipp Reisner 	}
3589b411b363SPhilipp Reisner 
3590b411b363SPhilipp Reisner 	if (hg == -100) {
3591580b9767SLars Ellenberg 		/* FIXME this log message is not correct if we end up here
3592580b9767SLars Ellenberg 		 * after an attempted attach on a diskless node.
3593580b9767SLars Ellenberg 		 * We just refuse to attach -- well, we drop the "connection"
3594580b9767SLars Ellenberg 		 * to that disk, in a way... */
3595d0180171SAndreas Gruenbacher 		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3596b30ab791SAndreas Gruenbacher 		drbd_khelper(device, "split-brain");
3597b411b363SPhilipp Reisner 		return C_MASK;
3598b411b363SPhilipp Reisner 	}
3599b411b363SPhilipp Reisner 
3600b411b363SPhilipp Reisner 	if (hg > 0 && mydisk <= D_INCONSISTENT) {
3601d0180171SAndreas Gruenbacher 		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3602b411b363SPhilipp Reisner 		return C_MASK;
3603b411b363SPhilipp Reisner 	}
3604b411b363SPhilipp Reisner 
3605b411b363SPhilipp Reisner 	if (hg < 0 && /* by intention we do not use mydisk here. */
3606b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
360744ed167dSPhilipp Reisner 		switch (rr_conflict) {
3608b411b363SPhilipp Reisner 		case ASB_CALL_HELPER:
3609b30ab791SAndreas Gruenbacher 			drbd_khelper(device, "pri-lost");
3610b411b363SPhilipp Reisner 			/* fall through */
3611b411b363SPhilipp Reisner 		case ASB_DISCONNECT:
3612d0180171SAndreas Gruenbacher 			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3613b411b363SPhilipp Reisner 			return C_MASK;
3614b411b363SPhilipp Reisner 		case ASB_VIOLENTLY:
3615d0180171SAndreas Gruenbacher 			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3616b411b363SPhilipp Reisner 			     "assumption\n");
3617b411b363SPhilipp Reisner 		}
3618b411b363SPhilipp Reisner 	}
3619b411b363SPhilipp Reisner 
362069a22773SAndreas Gruenbacher 	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3621cf14c2e9SPhilipp Reisner 		if (hg == 0)
3622d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3623cf14c2e9SPhilipp Reisner 		else
3624d0180171SAndreas Gruenbacher 			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3625cf14c2e9SPhilipp Reisner 				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3626cf14c2e9SPhilipp Reisner 				 abs(hg) >= 2 ? "full" : "bit-map based");
3627cf14c2e9SPhilipp Reisner 		return C_MASK;
3628cf14c2e9SPhilipp Reisner 	}
3629cf14c2e9SPhilipp Reisner 
3630b411b363SPhilipp Reisner 	if (abs(hg) >= 2) {
3631d0180171SAndreas Gruenbacher 		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3632b30ab791SAndreas Gruenbacher 		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
363320ceb2b2SLars Ellenberg 					BM_LOCKED_SET_ALLOWED))
3634b411b363SPhilipp Reisner 			return C_MASK;
3635b411b363SPhilipp Reisner 	}
3636b411b363SPhilipp Reisner 
3637b411b363SPhilipp Reisner 	if (hg > 0) { /* become sync source. */
3638b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_S;
3639b411b363SPhilipp Reisner 	} else if (hg < 0) { /* become sync target */
3640b411b363SPhilipp Reisner 		rv = C_WF_BITMAP_T;
3641b411b363SPhilipp Reisner 	} else {
3642b411b363SPhilipp Reisner 		rv = C_CONNECTED;
3643b30ab791SAndreas Gruenbacher 		if (drbd_bm_total_weight(device)) {
3644d0180171SAndreas Gruenbacher 			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3645b30ab791SAndreas Gruenbacher 			     drbd_bm_total_weight(device));
3646b411b363SPhilipp Reisner 		}
3647b411b363SPhilipp Reisner 	}
3648b411b363SPhilipp Reisner 
3649b411b363SPhilipp Reisner 	return rv;
3650b411b363SPhilipp Reisner }
3651b411b363SPhilipp Reisner 
3652f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3653b411b363SPhilipp Reisner {
3654b411b363SPhilipp Reisner 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3655f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_REMOTE)
3656f179d76dSPhilipp Reisner 		return ASB_DISCARD_LOCAL;
3657b411b363SPhilipp Reisner 
3658b411b363SPhilipp Reisner 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3659f179d76dSPhilipp Reisner 	if (peer == ASB_DISCARD_LOCAL)
3660f179d76dSPhilipp Reisner 		return ASB_DISCARD_REMOTE;
3661b411b363SPhilipp Reisner 
3662b411b363SPhilipp Reisner 	/* everything else is valid if they are equal on both sides. */
3663f179d76dSPhilipp Reisner 	return peer;
3664b411b363SPhilipp Reisner }
3665b411b363SPhilipp Reisner 
3666bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3667b411b363SPhilipp Reisner {
3668e658983aSAndreas Gruenbacher 	struct p_protocol *p = pi->data;
3669036b17eaSPhilipp Reisner 	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3670036b17eaSPhilipp Reisner 	int p_proto, p_discard_my_data, p_two_primaries, cf;
3671036b17eaSPhilipp Reisner 	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3672036b17eaSPhilipp Reisner 	char integrity_alg[SHARED_SECRET_MAX] = "";
36733d0e6375SKees Cook 	struct crypto_shash *peer_integrity_tfm = NULL;
36747aca6c75SPhilipp Reisner 	void *int_dig_in = NULL, *int_dig_vv = NULL;
3675b411b363SPhilipp Reisner 
3676b411b363SPhilipp Reisner 	p_proto		= be32_to_cpu(p->protocol);
3677b411b363SPhilipp Reisner 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
3678b411b363SPhilipp Reisner 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
3679b411b363SPhilipp Reisner 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
3680b411b363SPhilipp Reisner 	p_two_primaries = be32_to_cpu(p->two_primaries);
3681cf14c2e9SPhilipp Reisner 	cf		= be32_to_cpu(p->conn_flags);
36826139f60dSAndreas Gruenbacher 	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3683cf14c2e9SPhilipp Reisner 
3684bde89a9eSAndreas Gruenbacher 	if (connection->agreed_pro_version >= 87) {
368586db0618SAndreas Gruenbacher 		int err;
368686db0618SAndreas Gruenbacher 
368788104ca4SAndreas Gruenbacher 		if (pi->size > sizeof(integrity_alg))
368886db0618SAndreas Gruenbacher 			return -EIO;
3689bde89a9eSAndreas Gruenbacher 		err = drbd_recv_all(connection, integrity_alg, pi->size);
369086db0618SAndreas Gruenbacher 		if (err)
369186db0618SAndreas Gruenbacher 			return err;
369288104ca4SAndreas Gruenbacher 		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3693036b17eaSPhilipp Reisner 	}
369486db0618SAndreas Gruenbacher 
36957d4c782cSAndreas Gruenbacher 	if (pi->cmd != P_PROTOCOL_UPDATE) {
3696bde89a9eSAndreas Gruenbacher 		clear_bit(CONN_DRY_RUN, &connection->flags);
3697cf14c2e9SPhilipp Reisner 
3698cf14c2e9SPhilipp Reisner 		if (cf & CF_DRY_RUN)
3699bde89a9eSAndreas Gruenbacher 			set_bit(CONN_DRY_RUN, &connection->flags);
3700b411b363SPhilipp Reisner 
370144ed167dSPhilipp Reisner 		rcu_read_lock();
3702bde89a9eSAndreas Gruenbacher 		nc = rcu_dereference(connection->net_conf);
370344ed167dSPhilipp Reisner 
3704036b17eaSPhilipp Reisner 		if (p_proto != nc->wire_protocol) {
37051ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "protocol");
370644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3707b411b363SPhilipp Reisner 		}
3708b411b363SPhilipp Reisner 
3709f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
37101ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
371144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3712b411b363SPhilipp Reisner 		}
3713b411b363SPhilipp Reisner 
3714f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
37151ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
371644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3717b411b363SPhilipp Reisner 		}
3718b411b363SPhilipp Reisner 
3719f179d76dSPhilipp Reisner 		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
37201ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
372144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3722b411b363SPhilipp Reisner 		}
3723b411b363SPhilipp Reisner 
37246139f60dSAndreas Gruenbacher 		if (p_discard_my_data && nc->discard_my_data) {
37251ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
372644ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3727b411b363SPhilipp Reisner 		}
3728b411b363SPhilipp Reisner 
372944ed167dSPhilipp Reisner 		if (p_two_primaries != nc->two_primaries) {
37301ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
373144ed167dSPhilipp Reisner 			goto disconnect_rcu_unlock;
3732b411b363SPhilipp Reisner 		}
3733b411b363SPhilipp Reisner 
3734036b17eaSPhilipp Reisner 		if (strcmp(integrity_alg, nc->integrity_alg)) {
37351ec861ebSAndreas Gruenbacher 			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3736036b17eaSPhilipp Reisner 			goto disconnect_rcu_unlock;
3737036b17eaSPhilipp Reisner 		}
3738036b17eaSPhilipp Reisner 
373986db0618SAndreas Gruenbacher 		rcu_read_unlock();
3740fbc12f45SAndreas Gruenbacher 	}
37417d4c782cSAndreas Gruenbacher 
37427d4c782cSAndreas Gruenbacher 	if (integrity_alg[0]) {
37437d4c782cSAndreas Gruenbacher 		int hash_size;
37447d4c782cSAndreas Gruenbacher 
37457d4c782cSAndreas Gruenbacher 		/*
37467d4c782cSAndreas Gruenbacher 		 * We can only change the peer data integrity algorithm
37477d4c782cSAndreas Gruenbacher 		 * here.  Changing our own data integrity algorithm
37487d4c782cSAndreas Gruenbacher 		 * requires that we send a P_PROTOCOL_UPDATE packet at
37497d4c782cSAndreas Gruenbacher 		 * the same time; otherwise, the peer has no way to
37507d4c782cSAndreas Gruenbacher 		 * tell between which packets the algorithm should
37517d4c782cSAndreas Gruenbacher 		 * change.
37527d4c782cSAndreas Gruenbacher 		 */
37537d4c782cSAndreas Gruenbacher 
37543d234b33SEric Biggers 		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37551b57e663SLars Ellenberg 		if (IS_ERR(peer_integrity_tfm)) {
37561b57e663SLars Ellenberg 			peer_integrity_tfm = NULL;
37571ec861ebSAndreas Gruenbacher 			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37587d4c782cSAndreas Gruenbacher 				 integrity_alg);
3759b411b363SPhilipp Reisner 			goto disconnect;
3760b411b363SPhilipp Reisner 		}
3761b411b363SPhilipp Reisner 
37623d0e6375SKees Cook 		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37637d4c782cSAndreas Gruenbacher 		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37647d4c782cSAndreas Gruenbacher 		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37657d4c782cSAndreas Gruenbacher 		if (!(int_dig_in && int_dig_vv)) {
37661ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37677d4c782cSAndreas Gruenbacher 			goto disconnect;
37687d4c782cSAndreas Gruenbacher 		}
37697d4c782cSAndreas Gruenbacher 	}
37707d4c782cSAndreas Gruenbacher 
37717d4c782cSAndreas Gruenbacher 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37727d4c782cSAndreas Gruenbacher 	if (!new_net_conf) {
37731ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Allocation of new net_conf failed\n");
3774b411b363SPhilipp Reisner 		goto disconnect;
3775b411b363SPhilipp Reisner 	}
3776b411b363SPhilipp Reisner 
3777bde89a9eSAndreas Gruenbacher 	mutex_lock(&connection->data.mutex);
37780500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
3779bde89a9eSAndreas Gruenbacher 	old_net_conf = connection->net_conf;
37807d4c782cSAndreas Gruenbacher 	*new_net_conf = *old_net_conf;
3781b411b363SPhilipp Reisner 
37827d4c782cSAndreas Gruenbacher 	new_net_conf->wire_protocol = p_proto;
37837d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37847d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37857d4c782cSAndreas Gruenbacher 	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37867d4c782cSAndreas Gruenbacher 	new_net_conf->two_primaries = p_two_primaries;
3787b411b363SPhilipp Reisner 
3788bde89a9eSAndreas Gruenbacher 	rcu_assign_pointer(connection->net_conf, new_net_conf);
37890500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
3790bde89a9eSAndreas Gruenbacher 	mutex_unlock(&connection->data.mutex);
3791b411b363SPhilipp Reisner 
37923d0e6375SKees Cook 	crypto_free_shash(connection->peer_integrity_tfm);
3793bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_in);
3794bde89a9eSAndreas Gruenbacher 	kfree(connection->int_dig_vv);
3795bde89a9eSAndreas Gruenbacher 	connection->peer_integrity_tfm = peer_integrity_tfm;
3796bde89a9eSAndreas Gruenbacher 	connection->int_dig_in = int_dig_in;
3797bde89a9eSAndreas Gruenbacher 	connection->int_dig_vv = int_dig_vv;
3798b411b363SPhilipp Reisner 
37997d4c782cSAndreas Gruenbacher 	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
38001ec861ebSAndreas Gruenbacher 		drbd_info(connection, "peer data-integrity-alg: %s\n",
38017d4c782cSAndreas Gruenbacher 			  integrity_alg[0] ? integrity_alg : "(none)");
3802b411b363SPhilipp Reisner 
38037d4c782cSAndreas Gruenbacher 	synchronize_rcu();
38047d4c782cSAndreas Gruenbacher 	kfree(old_net_conf);
380582bc0194SAndreas Gruenbacher 	return 0;
3806b411b363SPhilipp Reisner 
380744ed167dSPhilipp Reisner disconnect_rcu_unlock:
380844ed167dSPhilipp Reisner 	rcu_read_unlock();
3809b411b363SPhilipp Reisner disconnect:
38103d0e6375SKees Cook 	crypto_free_shash(peer_integrity_tfm);
3811036b17eaSPhilipp Reisner 	kfree(int_dig_in);
3812036b17eaSPhilipp Reisner 	kfree(int_dig_vv);
3813bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
381482bc0194SAndreas Gruenbacher 	return -EIO;
3815b411b363SPhilipp Reisner }
3816b411b363SPhilipp Reisner 
3817b411b363SPhilipp Reisner /* helper function
3818b411b363SPhilipp Reisner  * input: alg name, feature name
3819b411b363SPhilipp Reisner  * return: NULL (alg name was "")
3820b411b363SPhilipp Reisner  *         ERR_PTR(error) if something goes wrong
3821b411b363SPhilipp Reisner  *         or the crypto hash ptr, if it worked out ok. */
38223d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
38233d0e6375SKees Cook 		const struct drbd_device *device,
3824b411b363SPhilipp Reisner 		const char *alg, const char *name)
3825b411b363SPhilipp Reisner {
38263d0e6375SKees Cook 	struct crypto_shash *tfm;
3827b411b363SPhilipp Reisner 
3828b411b363SPhilipp Reisner 	if (!alg[0])
3829b411b363SPhilipp Reisner 		return NULL;
3830b411b363SPhilipp Reisner 
38313d0e6375SKees Cook 	tfm = crypto_alloc_shash(alg, 0, 0);
3832b411b363SPhilipp Reisner 	if (IS_ERR(tfm)) {
3833d0180171SAndreas Gruenbacher 		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3834b411b363SPhilipp Reisner 			alg, name, PTR_ERR(tfm));
3835b411b363SPhilipp Reisner 		return tfm;
3836b411b363SPhilipp Reisner 	}
3837b411b363SPhilipp Reisner 	return tfm;
3838b411b363SPhilipp Reisner }
3839b411b363SPhilipp Reisner 
3840bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3841b411b363SPhilipp Reisner {
3842bde89a9eSAndreas Gruenbacher 	void *buffer = connection->data.rbuf;
38434a76b161SAndreas Gruenbacher 	int size = pi->size;
38444a76b161SAndreas Gruenbacher 
38454a76b161SAndreas Gruenbacher 	while (size) {
38464a76b161SAndreas Gruenbacher 		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3847bde89a9eSAndreas Gruenbacher 		s = drbd_recv(connection, buffer, s);
38484a76b161SAndreas Gruenbacher 		if (s <= 0) {
38494a76b161SAndreas Gruenbacher 			if (s < 0)
38504a76b161SAndreas Gruenbacher 				return s;
38514a76b161SAndreas Gruenbacher 			break;
38524a76b161SAndreas Gruenbacher 		}
38534a76b161SAndreas Gruenbacher 		size -= s;
38544a76b161SAndreas Gruenbacher 	}
38554a76b161SAndreas Gruenbacher 	if (size)
38564a76b161SAndreas Gruenbacher 		return -EIO;
38574a76b161SAndreas Gruenbacher 	return 0;
38584a76b161SAndreas Gruenbacher }
38594a76b161SAndreas Gruenbacher 
38604a76b161SAndreas Gruenbacher /*
38614a76b161SAndreas Gruenbacher  * config_unknown_volume  -  device configuration command for unknown volume
38624a76b161SAndreas Gruenbacher  *
38634a76b161SAndreas Gruenbacher  * When a device is added to an existing connection, the node on which the
38644a76b161SAndreas Gruenbacher  * device is added first will send configuration commands to its peer but the
38654a76b161SAndreas Gruenbacher  * peer will not know about the device yet.  It will warn and ignore these
38664a76b161SAndreas Gruenbacher  * commands.  Once the device is added on the second node, the second node will
38674a76b161SAndreas Gruenbacher  * send the same device configuration commands, but in the other direction.
38684a76b161SAndreas Gruenbacher  *
38694a76b161SAndreas Gruenbacher  * (We can also end up here if drbd is misconfigured.)
38704a76b161SAndreas Gruenbacher  */
3871bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38724a76b161SAndreas Gruenbacher {
38731ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38742fcb8f30SAndreas Gruenbacher 		  cmdname(pi->cmd), pi->vnr);
3875bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
38764a76b161SAndreas Gruenbacher }
38774a76b161SAndreas Gruenbacher 
3878bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38794a76b161SAndreas Gruenbacher {
38809f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
3881b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
3882e658983aSAndreas Gruenbacher 	struct p_rs_param_95 *p;
3883b411b363SPhilipp Reisner 	unsigned int header_size, data_size, exp_max_sz;
38843d0e6375SKees Cook 	struct crypto_shash *verify_tfm = NULL;
38853d0e6375SKees Cook 	struct crypto_shash *csums_tfm = NULL;
38862ec91e0eSPhilipp Reisner 	struct net_conf *old_net_conf, *new_net_conf = NULL;
3887813472ceSPhilipp Reisner 	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3888bde89a9eSAndreas Gruenbacher 	const int apv = connection->agreed_pro_version;
3889813472ceSPhilipp Reisner 	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
3890778f271dSPhilipp Reisner 	int fifo_size = 0;
389182bc0194SAndreas Gruenbacher 	int err;
3892b411b363SPhilipp Reisner 
38939f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
38949f4fe9adSAndreas Gruenbacher 	if (!peer_device)
3895bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
38969f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
3897b411b363SPhilipp Reisner 
3898b411b363SPhilipp Reisner 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
3899b411b363SPhilipp Reisner 		    : apv == 88 ? sizeof(struct p_rs_param)
3900b411b363SPhilipp Reisner 					+ SHARED_SECRET_MAX
39018e26f9ccSPhilipp Reisner 		    : apv <= 94 ? sizeof(struct p_rs_param_89)
39028e26f9ccSPhilipp Reisner 		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3903b411b363SPhilipp Reisner 
3904e2857216SAndreas Gruenbacher 	if (pi->size > exp_max_sz) {
3905d0180171SAndreas Gruenbacher 		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3906e2857216SAndreas Gruenbacher 		    pi->size, exp_max_sz);
390782bc0194SAndreas Gruenbacher 		return -EIO;
3908b411b363SPhilipp Reisner 	}
3909b411b363SPhilipp Reisner 
3910b411b363SPhilipp Reisner 	if (apv <= 88) {
3911e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param);
3912e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39138e26f9ccSPhilipp Reisner 	} else if (apv <= 94) {
3914e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_89);
3915e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39160b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
39178e26f9ccSPhilipp Reisner 	} else {
3918e658983aSAndreas Gruenbacher 		header_size = sizeof(struct p_rs_param_95);
3919e2857216SAndreas Gruenbacher 		data_size = pi->size - header_size;
39200b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, data_size == 0);
3921b411b363SPhilipp Reisner 	}
3922b411b363SPhilipp Reisner 
3923b411b363SPhilipp Reisner 	/* initialize verify_alg and csums_alg */
3924e658983aSAndreas Gruenbacher 	p = pi->data;
3925b411b363SPhilipp Reisner 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3926b411b363SPhilipp Reisner 
39279f4fe9adSAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, header_size);
392882bc0194SAndreas Gruenbacher 	if (err)
392982bc0194SAndreas Gruenbacher 		return err;
3930b411b363SPhilipp Reisner 
39310500813fSAndreas Gruenbacher 	mutex_lock(&connection->resource->conf_update);
39329f4fe9adSAndreas Gruenbacher 	old_net_conf = peer_device->connection->net_conf;
3933b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
3934daeda1ccSPhilipp Reisner 		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3935daeda1ccSPhilipp Reisner 		if (!new_disk_conf) {
3936b30ab791SAndreas Gruenbacher 			put_ldev(device);
39370500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
3938d0180171SAndreas Gruenbacher 			drbd_err(device, "Allocation of new disk_conf failed\n");
3939daeda1ccSPhilipp Reisner 			return -ENOMEM;
3940f399002eSLars Ellenberg 		}
3941b411b363SPhilipp Reisner 
3942b30ab791SAndreas Gruenbacher 		old_disk_conf = device->ldev->disk_conf;
3943daeda1ccSPhilipp Reisner 		*new_disk_conf = *old_disk_conf;
3944daeda1ccSPhilipp Reisner 
39456394b935SAndreas Gruenbacher 		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3946813472ceSPhilipp Reisner 	}
3947b411b363SPhilipp Reisner 
3948b411b363SPhilipp Reisner 	if (apv >= 88) {
3949b411b363SPhilipp Reisner 		if (apv == 88) {
39505de73827SPhilipp Reisner 			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3951d0180171SAndreas Gruenbacher 				drbd_err(device, "verify-alg of wrong size, "
39525de73827SPhilipp Reisner 					"peer wants %u, accepting only up to %u byte\n",
3953b411b363SPhilipp Reisner 					data_size, SHARED_SECRET_MAX);
3954813472ceSPhilipp Reisner 				err = -EIO;
3955813472ceSPhilipp Reisner 				goto reconnect;
3956b411b363SPhilipp Reisner 			}
3957b411b363SPhilipp Reisner 
39589f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3959813472ceSPhilipp Reisner 			if (err)
3960813472ceSPhilipp Reisner 				goto reconnect;
3961b411b363SPhilipp Reisner 			/* we expect NUL terminated string */
3962b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39630b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3964b411b363SPhilipp Reisner 			p->verify_alg[data_size-1] = 0;
3965b411b363SPhilipp Reisner 
3966b411b363SPhilipp Reisner 		} else /* apv >= 89 */ {
3967b411b363SPhilipp Reisner 			/* we still expect NUL terminated strings */
3968b411b363SPhilipp Reisner 			/* but just in case someone tries to be evil */
39690b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39700b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3971b411b363SPhilipp Reisner 			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3972b411b363SPhilipp Reisner 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3973b411b363SPhilipp Reisner 		}
3974b411b363SPhilipp Reisner 
39752ec91e0eSPhilipp Reisner 		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3976b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3977d0180171SAndreas Gruenbacher 				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39782ec91e0eSPhilipp Reisner 				    old_net_conf->verify_alg, p->verify_alg);
3979b411b363SPhilipp Reisner 				goto disconnect;
3980b411b363SPhilipp Reisner 			}
3981b30ab791SAndreas Gruenbacher 			verify_tfm = drbd_crypto_alloc_digest_safe(device,
3982b411b363SPhilipp Reisner 					p->verify_alg, "verify-alg");
3983b411b363SPhilipp Reisner 			if (IS_ERR(verify_tfm)) {
3984b411b363SPhilipp Reisner 				verify_tfm = NULL;
3985b411b363SPhilipp Reisner 				goto disconnect;
3986b411b363SPhilipp Reisner 			}
3987b411b363SPhilipp Reisner 		}
3988b411b363SPhilipp Reisner 
39892ec91e0eSPhilipp Reisner 		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3990b30ab791SAndreas Gruenbacher 			if (device->state.conn == C_WF_REPORT_PARAMS) {
3991d0180171SAndreas Gruenbacher 				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39922ec91e0eSPhilipp Reisner 				    old_net_conf->csums_alg, p->csums_alg);
3993b411b363SPhilipp Reisner 				goto disconnect;
3994b411b363SPhilipp Reisner 			}
3995b30ab791SAndreas Gruenbacher 			csums_tfm = drbd_crypto_alloc_digest_safe(device,
3996b411b363SPhilipp Reisner 					p->csums_alg, "csums-alg");
3997b411b363SPhilipp Reisner 			if (IS_ERR(csums_tfm)) {
3998b411b363SPhilipp Reisner 				csums_tfm = NULL;
3999b411b363SPhilipp Reisner 				goto disconnect;
4000b411b363SPhilipp Reisner 			}
4001b411b363SPhilipp Reisner 		}
4002b411b363SPhilipp Reisner 
4003813472ceSPhilipp Reisner 		if (apv > 94 && new_disk_conf) {
4004daeda1ccSPhilipp Reisner 			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
4005daeda1ccSPhilipp Reisner 			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
4006daeda1ccSPhilipp Reisner 			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
4007daeda1ccSPhilipp Reisner 			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
4008778f271dSPhilipp Reisner 
4009daeda1ccSPhilipp Reisner 			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
4010b30ab791SAndreas Gruenbacher 			if (fifo_size != device->rs_plan_s->size) {
4011813472ceSPhilipp Reisner 				new_plan = fifo_alloc(fifo_size);
4012813472ceSPhilipp Reisner 				if (!new_plan) {
4013d0180171SAndreas Gruenbacher 					drbd_err(device, "kmalloc of fifo_buffer failed");
4014b30ab791SAndreas Gruenbacher 					put_ldev(device);
4015778f271dSPhilipp Reisner 					goto disconnect;
4016778f271dSPhilipp Reisner 				}
4017778f271dSPhilipp Reisner 			}
40188e26f9ccSPhilipp Reisner 		}
4019b411b363SPhilipp Reisner 
402091fd4dadSPhilipp Reisner 		if (verify_tfm || csums_tfm) {
40212ec91e0eSPhilipp Reisner 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
40222ec91e0eSPhilipp Reisner 			if (!new_net_conf) {
4023d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new net_conf failed\n");
402491fd4dadSPhilipp Reisner 				goto disconnect;
402591fd4dadSPhilipp Reisner 			}
402691fd4dadSPhilipp Reisner 
40272ec91e0eSPhilipp Reisner 			*new_net_conf = *old_net_conf;
402891fd4dadSPhilipp Reisner 
4029b411b363SPhilipp Reisner 			if (verify_tfm) {
40302ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->verify_alg, p->verify_alg);
40312ec91e0eSPhilipp Reisner 				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
40323d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->verify_tfm);
40339f4fe9adSAndreas Gruenbacher 				peer_device->connection->verify_tfm = verify_tfm;
4034d0180171SAndreas Gruenbacher 				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
4035b411b363SPhilipp Reisner 			}
4036b411b363SPhilipp Reisner 			if (csums_tfm) {
40372ec91e0eSPhilipp Reisner 				strcpy(new_net_conf->csums_alg, p->csums_alg);
40382ec91e0eSPhilipp Reisner 				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
40393d0e6375SKees Cook 				crypto_free_shash(peer_device->connection->csums_tfm);
40409f4fe9adSAndreas Gruenbacher 				peer_device->connection->csums_tfm = csums_tfm;
4041d0180171SAndreas Gruenbacher 				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4042b411b363SPhilipp Reisner 			}
4043bde89a9eSAndreas Gruenbacher 			rcu_assign_pointer(connection->net_conf, new_net_conf);
4044778f271dSPhilipp Reisner 		}
4045b411b363SPhilipp Reisner 	}
4046b411b363SPhilipp Reisner 
4047813472ceSPhilipp Reisner 	if (new_disk_conf) {
4048b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4049b30ab791SAndreas Gruenbacher 		put_ldev(device);
4050b411b363SPhilipp Reisner 	}
4051813472ceSPhilipp Reisner 
4052813472ceSPhilipp Reisner 	if (new_plan) {
4053b30ab791SAndreas Gruenbacher 		old_plan = device->rs_plan_s;
4054b30ab791SAndreas Gruenbacher 		rcu_assign_pointer(device->rs_plan_s, new_plan);
4055813472ceSPhilipp Reisner 	}
4056daeda1ccSPhilipp Reisner 
40570500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4058daeda1ccSPhilipp Reisner 	synchronize_rcu();
4059daeda1ccSPhilipp Reisner 	if (new_net_conf)
4060daeda1ccSPhilipp Reisner 		kfree(old_net_conf);
4061daeda1ccSPhilipp Reisner 	kfree(old_disk_conf);
4062813472ceSPhilipp Reisner 	kfree(old_plan);
4063daeda1ccSPhilipp Reisner 
406482bc0194SAndreas Gruenbacher 	return 0;
4065b411b363SPhilipp Reisner 
4066813472ceSPhilipp Reisner reconnect:
4067813472ceSPhilipp Reisner 	if (new_disk_conf) {
4068b30ab791SAndreas Gruenbacher 		put_ldev(device);
4069813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4070813472ceSPhilipp Reisner 	}
40710500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4072813472ceSPhilipp Reisner 	return -EIO;
4073813472ceSPhilipp Reisner 
4074b411b363SPhilipp Reisner disconnect:
4075813472ceSPhilipp Reisner 	kfree(new_plan);
4076813472ceSPhilipp Reisner 	if (new_disk_conf) {
4077b30ab791SAndreas Gruenbacher 		put_ldev(device);
4078813472ceSPhilipp Reisner 		kfree(new_disk_conf);
4079813472ceSPhilipp Reisner 	}
40800500813fSAndreas Gruenbacher 	mutex_unlock(&connection->resource->conf_update);
4081b411b363SPhilipp Reisner 	/* just for completeness: actually not needed,
4082b411b363SPhilipp Reisner 	 * as this is not reached if csums_tfm was ok. */
40833d0e6375SKees Cook 	crypto_free_shash(csums_tfm);
4084b411b363SPhilipp Reisner 	/* but free the verify_tfm again, if csums_tfm did not work out */
40853d0e6375SKees Cook 	crypto_free_shash(verify_tfm);
40869f4fe9adSAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
408782bc0194SAndreas Gruenbacher 	return -EIO;
4088b411b363SPhilipp Reisner }
4089b411b363SPhilipp Reisner 
4090b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
4091b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4092b411b363SPhilipp Reisner 	const char *s, sector_t a, sector_t b)
4093b411b363SPhilipp Reisner {
4094b411b363SPhilipp Reisner 	sector_t d;
4095b411b363SPhilipp Reisner 	if (a == 0 || b == 0)
4096b411b363SPhilipp Reisner 		return;
4097b411b363SPhilipp Reisner 	d = (a > b) ? (a - b) : (b - a);
4098b411b363SPhilipp Reisner 	if (d > (a>>3) || d > (b>>3))
4099d0180171SAndreas Gruenbacher 		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4100b411b363SPhilipp Reisner 		     (unsigned long long)a, (unsigned long long)b);
4101b411b363SPhilipp Reisner }
4102b411b363SPhilipp Reisner 
4103bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4104b411b363SPhilipp Reisner {
41059f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4106b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4107e658983aSAndreas Gruenbacher 	struct p_sizes *p = pi->data;
41089104d31aSLars Ellenberg 	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4109e96c9633SPhilipp Reisner 	enum determine_dev_size dd = DS_UNCHANGED;
41106a8d68b1SLars Ellenberg 	sector_t p_size, p_usize, p_csize, my_usize;
411194c43a13SLars Ellenberg 	sector_t new_size, cur_size;
4112b411b363SPhilipp Reisner 	int ldsc = 0; /* local disk size changed */
4113e89b591cSPhilipp Reisner 	enum dds_flags ddsf;
4114b411b363SPhilipp Reisner 
41159f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
41169f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4117bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
41189f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
411994c43a13SLars Ellenberg 	cur_size = drbd_get_capacity(device->this_bdev);
41204a76b161SAndreas Gruenbacher 
4121b411b363SPhilipp Reisner 	p_size = be64_to_cpu(p->d_size);
4122b411b363SPhilipp Reisner 	p_usize = be64_to_cpu(p->u_size);
41236a8d68b1SLars Ellenberg 	p_csize = be64_to_cpu(p->c_size);
4124b411b363SPhilipp Reisner 
4125b411b363SPhilipp Reisner 	/* just store the peer's disk size for now.
4126b411b363SPhilipp Reisner 	 * we still need to figure out whether we accept that. */
4127b30ab791SAndreas Gruenbacher 	device->p_size = p_size;
4128b411b363SPhilipp Reisner 
4129b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4130daeda1ccSPhilipp Reisner 		rcu_read_lock();
4131b30ab791SAndreas Gruenbacher 		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4132daeda1ccSPhilipp Reisner 		rcu_read_unlock();
4133daeda1ccSPhilipp Reisner 
4134b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "lower level device sizes",
4135b30ab791SAndreas Gruenbacher 			   p_size, drbd_get_max_capacity(device->ldev));
4136b30ab791SAndreas Gruenbacher 		warn_if_differ_considerably(device, "user requested size",
4137daeda1ccSPhilipp Reisner 					    p_usize, my_usize);
4138b411b363SPhilipp Reisner 
4139b411b363SPhilipp Reisner 		/* if this is the first connect, or an otherwise expected
4140b411b363SPhilipp Reisner 		 * param exchange, choose the minimum */
4141b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_WF_REPORT_PARAMS)
4142daeda1ccSPhilipp Reisner 			p_usize = min_not_zero(my_usize, p_usize);
4143b411b363SPhilipp Reisner 
4144ad6e8979SLars Ellenberg 		/* Never shrink a device with usable data during connect,
4145ad6e8979SLars Ellenberg 		 * or "attach" on the peer.
4146ad6e8979SLars Ellenberg 		 * But allow online shrinking if we are connected. */
414760bac040SLars Ellenberg 		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
414860bac040SLars Ellenberg 		if (new_size < cur_size &&
4149b30ab791SAndreas Gruenbacher 		    device->state.disk >= D_OUTDATED &&
4150ad6e8979SLars Ellenberg 		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
415160bac040SLars Ellenberg 			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
415260bac040SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
41539f4fe9adSAndreas Gruenbacher 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4154b30ab791SAndreas Gruenbacher 			put_ldev(device);
415582bc0194SAndreas Gruenbacher 			return -EIO;
4156b411b363SPhilipp Reisner 		}
4157daeda1ccSPhilipp Reisner 
4158daeda1ccSPhilipp Reisner 		if (my_usize != p_usize) {
4159daeda1ccSPhilipp Reisner 			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4160daeda1ccSPhilipp Reisner 
4161daeda1ccSPhilipp Reisner 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4162daeda1ccSPhilipp Reisner 			if (!new_disk_conf) {
4163d0180171SAndreas Gruenbacher 				drbd_err(device, "Allocation of new disk_conf failed\n");
4164b30ab791SAndreas Gruenbacher 				put_ldev(device);
4165daeda1ccSPhilipp Reisner 				return -ENOMEM;
4166daeda1ccSPhilipp Reisner 			}
4167daeda1ccSPhilipp Reisner 
41680500813fSAndreas Gruenbacher 			mutex_lock(&connection->resource->conf_update);
4169b30ab791SAndreas Gruenbacher 			old_disk_conf = device->ldev->disk_conf;
4170daeda1ccSPhilipp Reisner 			*new_disk_conf = *old_disk_conf;
4171daeda1ccSPhilipp Reisner 			new_disk_conf->disk_size = p_usize;
4172daeda1ccSPhilipp Reisner 
4173b30ab791SAndreas Gruenbacher 			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41740500813fSAndreas Gruenbacher 			mutex_unlock(&connection->resource->conf_update);
4175daeda1ccSPhilipp Reisner 			synchronize_rcu();
4176daeda1ccSPhilipp Reisner 			kfree(old_disk_conf);
4177daeda1ccSPhilipp Reisner 
4178ad6e8979SLars Ellenberg 			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4179ad6e8979SLars Ellenberg 				 (unsigned long)p_usize, (unsigned long)my_usize);
4180daeda1ccSPhilipp Reisner 		}
4181daeda1ccSPhilipp Reisner 
4182b30ab791SAndreas Gruenbacher 		put_ldev(device);
4183b411b363SPhilipp Reisner 	}
4184b411b363SPhilipp Reisner 
418520c68fdeSLars Ellenberg 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4186dd4f699dSLars Ellenberg 	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
418720c68fdeSLars Ellenberg 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4188dd4f699dSLars Ellenberg 	   drbd_reconsider_queue_parameters(), we can be sure that after
418920c68fdeSLars Ellenberg 	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
419020c68fdeSLars Ellenberg 
4191e89b591cSPhilipp Reisner 	ddsf = be16_to_cpu(p->dds_flags);
4192b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
41939104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, device->ldev, o);
4194b30ab791SAndreas Gruenbacher 		dd = drbd_determine_dev_size(device, ddsf, NULL);
4195b30ab791SAndreas Gruenbacher 		put_ldev(device);
4196e96c9633SPhilipp Reisner 		if (dd == DS_ERROR)
419782bc0194SAndreas Gruenbacher 			return -EIO;
4198b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
4199b411b363SPhilipp Reisner 	} else {
42006a8d68b1SLars Ellenberg 		/*
42016a8d68b1SLars Ellenberg 		 * I am diskless, need to accept the peer's *current* size.
42026a8d68b1SLars Ellenberg 		 * I must NOT accept the peers backing disk size,
42036a8d68b1SLars Ellenberg 		 * it may have been larger than mine all along...
42046a8d68b1SLars Ellenberg 		 *
42056a8d68b1SLars Ellenberg 		 * At this point, the peer knows more about my disk, or at
42066a8d68b1SLars Ellenberg 		 * least about what we last agreed upon, than myself.
42076a8d68b1SLars Ellenberg 		 * So if his c_size is less than his d_size, the most likely
42086a8d68b1SLars Ellenberg 		 * reason is that *my* d_size was smaller last time we checked.
42096a8d68b1SLars Ellenberg 		 *
42106a8d68b1SLars Ellenberg 		 * However, if he sends a zero current size,
42116a8d68b1SLars Ellenberg 		 * take his (user-capped or) backing disk size anyways.
421294c43a13SLars Ellenberg 		 *
421394c43a13SLars Ellenberg 		 * Unless of course he does not have a disk himself.
421494c43a13SLars Ellenberg 		 * In which case we ignore this completely.
42156a8d68b1SLars Ellenberg 		 */
421694c43a13SLars Ellenberg 		sector_t new_size = p_csize ?: p_usize ?: p_size;
42179104d31aSLars Ellenberg 		drbd_reconsider_queue_parameters(device, NULL, o);
421894c43a13SLars Ellenberg 		if (new_size == 0) {
421994c43a13SLars Ellenberg 			/* Ignore, peer does not know nothing. */
422094c43a13SLars Ellenberg 		} else if (new_size == cur_size) {
422194c43a13SLars Ellenberg 			/* nothing to do */
422294c43a13SLars Ellenberg 		} else if (cur_size != 0 && p_size == 0) {
422394c43a13SLars Ellenberg 			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
422494c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422594c43a13SLars Ellenberg 		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
422694c43a13SLars Ellenberg 			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
422794c43a13SLars Ellenberg 					(unsigned long long)new_size, (unsigned long long)cur_size);
422894c43a13SLars Ellenberg 			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
422994c43a13SLars Ellenberg 			return -EIO;
423094c43a13SLars Ellenberg 		} else {
423194c43a13SLars Ellenberg 			/* I believe the peer, if
423294c43a13SLars Ellenberg 			 *  - I don't have a current size myself
423394c43a13SLars Ellenberg 			 *  - we agree on the size anyways
423494c43a13SLars Ellenberg 			 *  - I do have a current size, am Secondary,
423594c43a13SLars Ellenberg 			 *    and he has the only disk
423694c43a13SLars Ellenberg 			 *  - I do have a current size, am Primary,
423794c43a13SLars Ellenberg 			 *    and he has the only disk,
423894c43a13SLars Ellenberg 			 *    which is larger than my current size
423994c43a13SLars Ellenberg 			 */
424094c43a13SLars Ellenberg 			drbd_set_my_capacity(device, new_size);
424194c43a13SLars Ellenberg 		}
4242b411b363SPhilipp Reisner 	}
4243b411b363SPhilipp Reisner 
4244b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4245b30ab791SAndreas Gruenbacher 		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4246b30ab791SAndreas Gruenbacher 			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4247b411b363SPhilipp Reisner 			ldsc = 1;
4248b411b363SPhilipp Reisner 		}
4249b411b363SPhilipp Reisner 
4250b30ab791SAndreas Gruenbacher 		put_ldev(device);
4251b411b363SPhilipp Reisner 	}
4252b411b363SPhilipp Reisner 
4253b30ab791SAndreas Gruenbacher 	if (device->state.conn > C_WF_REPORT_PARAMS) {
4254b411b363SPhilipp Reisner 		if (be64_to_cpu(p->c_size) !=
4255b30ab791SAndreas Gruenbacher 		    drbd_get_capacity(device->this_bdev) || ldsc) {
4256b411b363SPhilipp Reisner 			/* we have different sizes, probably peer
4257b411b363SPhilipp Reisner 			 * needs to know my new size... */
425869a22773SAndreas Gruenbacher 			drbd_send_sizes(peer_device, 0, ddsf);
4259b411b363SPhilipp Reisner 		}
4260b30ab791SAndreas Gruenbacher 		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4261b30ab791SAndreas Gruenbacher 		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4262b30ab791SAndreas Gruenbacher 			if (device->state.pdsk >= D_INCONSISTENT &&
4263b30ab791SAndreas Gruenbacher 			    device->state.disk >= D_INCONSISTENT) {
4264e89b591cSPhilipp Reisner 				if (ddsf & DDSF_NO_RESYNC)
4265d0180171SAndreas Gruenbacher 					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4266b411b363SPhilipp Reisner 				else
4267b30ab791SAndreas Gruenbacher 					resync_after_online_grow(device);
4268e89b591cSPhilipp Reisner 			} else
4269b30ab791SAndreas Gruenbacher 				set_bit(RESYNC_AFTER_NEG, &device->flags);
4270b411b363SPhilipp Reisner 		}
4271b411b363SPhilipp Reisner 	}
4272b411b363SPhilipp Reisner 
427382bc0194SAndreas Gruenbacher 	return 0;
4274b411b363SPhilipp Reisner }
4275b411b363SPhilipp Reisner 
4276bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4277b411b363SPhilipp Reisner {
42789f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4279b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4280e658983aSAndreas Gruenbacher 	struct p_uuids *p = pi->data;
4281b411b363SPhilipp Reisner 	u64 *p_uuid;
428262b0da3aSLars Ellenberg 	int i, updated_uuids = 0;
4283b411b363SPhilipp Reisner 
42849f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
42859f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4286bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
42879f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
42884a76b161SAndreas Gruenbacher 
4289365cf663SRoland Kammerer 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
4290063eacf8SJing Wang 	if (!p_uuid) {
4291d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc of p_uuid failed\n");
4292063eacf8SJing Wang 		return false;
4293063eacf8SJing Wang 	}
4294b411b363SPhilipp Reisner 
4295b411b363SPhilipp Reisner 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4296b411b363SPhilipp Reisner 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
4297b411b363SPhilipp Reisner 
4298b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
4299b30ab791SAndreas Gruenbacher 	device->p_uuid = p_uuid;
4300b411b363SPhilipp Reisner 
4301b17b5960SLars Ellenberg 	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4302b30ab791SAndreas Gruenbacher 	    device->state.disk < D_INCONSISTENT &&
4303b30ab791SAndreas Gruenbacher 	    device->state.role == R_PRIMARY &&
4304b30ab791SAndreas Gruenbacher 	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4305d0180171SAndreas Gruenbacher 		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4306b30ab791SAndreas Gruenbacher 		    (unsigned long long)device->ed_uuid);
43079f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
430882bc0194SAndreas Gruenbacher 		return -EIO;
4309b411b363SPhilipp Reisner 	}
4310b411b363SPhilipp Reisner 
4311b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
4312b411b363SPhilipp Reisner 		int skip_initial_sync =
4313b30ab791SAndreas Gruenbacher 			device->state.conn == C_CONNECTED &&
43149f4fe9adSAndreas Gruenbacher 			peer_device->connection->agreed_pro_version >= 90 &&
4315b30ab791SAndreas Gruenbacher 			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4316b411b363SPhilipp Reisner 			(p_uuid[UI_FLAGS] & 8);
4317b411b363SPhilipp Reisner 		if (skip_initial_sync) {
4318d0180171SAndreas Gruenbacher 			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4319b30ab791SAndreas Gruenbacher 			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
432020ceb2b2SLars Ellenberg 					"clear_n_write from receive_uuids",
432120ceb2b2SLars Ellenberg 					BM_LOCKED_TEST_ALLOWED);
4322b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4323b30ab791SAndreas Gruenbacher 			_drbd_uuid_set(device, UI_BITMAP, 0);
4324b30ab791SAndreas Gruenbacher 			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4325b411b363SPhilipp Reisner 					CS_VERBOSE, NULL);
4326b30ab791SAndreas Gruenbacher 			drbd_md_sync(device);
432762b0da3aSLars Ellenberg 			updated_uuids = 1;
4328b411b363SPhilipp Reisner 		}
4329b30ab791SAndreas Gruenbacher 		put_ldev(device);
4330b30ab791SAndreas Gruenbacher 	} else if (device->state.disk < D_INCONSISTENT &&
4331b30ab791SAndreas Gruenbacher 		   device->state.role == R_PRIMARY) {
433218a50fa2SPhilipp Reisner 		/* I am a diskless primary, the peer just created a new current UUID
433318a50fa2SPhilipp Reisner 		   for me. */
4334b30ab791SAndreas Gruenbacher 		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4335b411b363SPhilipp Reisner 	}
4336b411b363SPhilipp Reisner 
4337b411b363SPhilipp Reisner 	/* Before we test for the disk state, we should wait until an eventually
4338b411b363SPhilipp Reisner 	   ongoing cluster wide state change is finished. That is important if
4339b411b363SPhilipp Reisner 	   we are primary and are detaching from our disk. We need to see the
4340b411b363SPhilipp Reisner 	   new disk state... */
4341b30ab791SAndreas Gruenbacher 	mutex_lock(device->state_mutex);
4342b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
4343b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4344b30ab791SAndreas Gruenbacher 		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
434562b0da3aSLars Ellenberg 
434662b0da3aSLars Ellenberg 	if (updated_uuids)
4347b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "receiver updated UUIDs to");
4348b411b363SPhilipp Reisner 
434982bc0194SAndreas Gruenbacher 	return 0;
4350b411b363SPhilipp Reisner }
4351b411b363SPhilipp Reisner 
4352b411b363SPhilipp Reisner /**
4353b411b363SPhilipp Reisner  * convert_state() - Converts the peer's view of the cluster state to our point of view
4354b411b363SPhilipp Reisner  * @ps:		The state as seen by the peer.
4355b411b363SPhilipp Reisner  */
4356b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4357b411b363SPhilipp Reisner {
4358b411b363SPhilipp Reisner 	union drbd_state ms;
4359b411b363SPhilipp Reisner 
4360b411b363SPhilipp Reisner 	static enum drbd_conns c_tab[] = {
4361369bea63SPhilipp Reisner 		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4362b411b363SPhilipp Reisner 		[C_CONNECTED] = C_CONNECTED,
4363b411b363SPhilipp Reisner 
4364b411b363SPhilipp Reisner 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4365b411b363SPhilipp Reisner 		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4366b411b363SPhilipp Reisner 		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4367b411b363SPhilipp Reisner 		[C_VERIFY_S]       = C_VERIFY_T,
4368b411b363SPhilipp Reisner 		[C_MASK]   = C_MASK,
4369b411b363SPhilipp Reisner 	};
4370b411b363SPhilipp Reisner 
4371b411b363SPhilipp Reisner 	ms.i = ps.i;
4372b411b363SPhilipp Reisner 
4373b411b363SPhilipp Reisner 	ms.conn = c_tab[ps.conn];
4374b411b363SPhilipp Reisner 	ms.peer = ps.role;
4375b411b363SPhilipp Reisner 	ms.role = ps.peer;
4376b411b363SPhilipp Reisner 	ms.pdsk = ps.disk;
4377b411b363SPhilipp Reisner 	ms.disk = ps.pdsk;
4378b411b363SPhilipp Reisner 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4379b411b363SPhilipp Reisner 
4380b411b363SPhilipp Reisner 	return ms;
4381b411b363SPhilipp Reisner }
4382b411b363SPhilipp Reisner 
4383bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4384b411b363SPhilipp Reisner {
43859f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4386b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4387e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4388b411b363SPhilipp Reisner 	union drbd_state mask, val;
4389bf885f8aSAndreas Gruenbacher 	enum drbd_state_rv rv;
4390b411b363SPhilipp Reisner 
43919f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
43929f4fe9adSAndreas Gruenbacher 	if (!peer_device)
43934a76b161SAndreas Gruenbacher 		return -EIO;
43949f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
43954a76b161SAndreas Gruenbacher 
4396b411b363SPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4397b411b363SPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4398b411b363SPhilipp Reisner 
43999f4fe9adSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4400b30ab791SAndreas Gruenbacher 	    mutex_is_locked(device->state_mutex)) {
440169a22773SAndreas Gruenbacher 		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
440282bc0194SAndreas Gruenbacher 		return 0;
4403b411b363SPhilipp Reisner 	}
4404b411b363SPhilipp Reisner 
4405b411b363SPhilipp Reisner 	mask = convert_state(mask);
4406b411b363SPhilipp Reisner 	val = convert_state(val);
4407b411b363SPhilipp Reisner 
4408b30ab791SAndreas Gruenbacher 	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
440969a22773SAndreas Gruenbacher 	drbd_send_sr_reply(peer_device, rv);
4410047cd4a6SPhilipp Reisner 
4411b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
4412b411b363SPhilipp Reisner 
441382bc0194SAndreas Gruenbacher 	return 0;
4414b411b363SPhilipp Reisner }
4415b411b363SPhilipp Reisner 
4416bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4417b411b363SPhilipp Reisner {
4418e658983aSAndreas Gruenbacher 	struct p_req_state *p = pi->data;
4419dfafcc8aSPhilipp Reisner 	union drbd_state mask, val;
4420dfafcc8aSPhilipp Reisner 	enum drbd_state_rv rv;
4421dfafcc8aSPhilipp Reisner 
4422dfafcc8aSPhilipp Reisner 	mask.i = be32_to_cpu(p->mask);
4423dfafcc8aSPhilipp Reisner 	val.i = be32_to_cpu(p->val);
4424dfafcc8aSPhilipp Reisner 
4425bde89a9eSAndreas Gruenbacher 	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4426bde89a9eSAndreas Gruenbacher 	    mutex_is_locked(&connection->cstate_mutex)) {
4427bde89a9eSAndreas Gruenbacher 		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
442882bc0194SAndreas Gruenbacher 		return 0;
4429dfafcc8aSPhilipp Reisner 	}
4430dfafcc8aSPhilipp Reisner 
4431dfafcc8aSPhilipp Reisner 	mask = convert_state(mask);
4432dfafcc8aSPhilipp Reisner 	val = convert_state(val);
4433dfafcc8aSPhilipp Reisner 
4434bde89a9eSAndreas Gruenbacher 	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4435bde89a9eSAndreas Gruenbacher 	conn_send_sr_reply(connection, rv);
4436dfafcc8aSPhilipp Reisner 
443782bc0194SAndreas Gruenbacher 	return 0;
4438dfafcc8aSPhilipp Reisner }
4439dfafcc8aSPhilipp Reisner 
4440bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4441b411b363SPhilipp Reisner {
44429f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4443b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4444e658983aSAndreas Gruenbacher 	struct p_state *p = pi->data;
44454ac4aadaSLars Ellenberg 	union drbd_state os, ns, peer_state;
4446b411b363SPhilipp Reisner 	enum drbd_disk_state real_peer_disk;
444765d922c3SPhilipp Reisner 	enum chg_state_flags cs_flags;
4448b411b363SPhilipp Reisner 	int rv;
4449b411b363SPhilipp Reisner 
44509f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
44519f4fe9adSAndreas Gruenbacher 	if (!peer_device)
4452bde89a9eSAndreas Gruenbacher 		return config_unknown_volume(connection, pi);
44539f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
44544a76b161SAndreas Gruenbacher 
4455b411b363SPhilipp Reisner 	peer_state.i = be32_to_cpu(p->state);
4456b411b363SPhilipp Reisner 
4457b411b363SPhilipp Reisner 	real_peer_disk = peer_state.disk;
4458b411b363SPhilipp Reisner 	if (peer_state.disk == D_NEGOTIATING) {
4459b30ab791SAndreas Gruenbacher 		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4460d0180171SAndreas Gruenbacher 		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4461b411b363SPhilipp Reisner 	}
4462b411b363SPhilipp Reisner 
44630500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4464b411b363SPhilipp Reisner  retry:
4465b30ab791SAndreas Gruenbacher 	os = ns = drbd_read_state(device);
44660500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4467b411b363SPhilipp Reisner 
4468668700b4SPhilipp Reisner 	/* If some other part of the code (ack_receiver thread, timeout)
4469545752d5SLars Ellenberg 	 * already decided to close the connection again,
4470545752d5SLars Ellenberg 	 * we must not "re-establish" it here. */
4471545752d5SLars Ellenberg 	if (os.conn <= C_TEAR_DOWN)
447258ffa580SLars Ellenberg 		return -ECONNRESET;
4473545752d5SLars Ellenberg 
447440424e4aSLars Ellenberg 	/* If this is the "end of sync" confirmation, usually the peer disk
447540424e4aSLars Ellenberg 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
447640424e4aSLars Ellenberg 	 * set) resync started in PausedSyncT, or if the timing of pause-/
447740424e4aSLars Ellenberg 	 * unpause-sync events has been "just right", the peer disk may
447840424e4aSLars Ellenberg 	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
447940424e4aSLars Ellenberg 	 */
448040424e4aSLars Ellenberg 	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
448140424e4aSLars Ellenberg 	    real_peer_disk == D_UP_TO_DATE &&
4482e9ef7bb6SLars Ellenberg 	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4483e9ef7bb6SLars Ellenberg 		/* If we are (becoming) SyncSource, but peer is still in sync
4484e9ef7bb6SLars Ellenberg 		 * preparation, ignore its uptodate-ness to avoid flapping, it
4485e9ef7bb6SLars Ellenberg 		 * will change to inconsistent once the peer reaches active
4486e9ef7bb6SLars Ellenberg 		 * syncing states.
4487e9ef7bb6SLars Ellenberg 		 * It may have changed syncer-paused flags, however, so we
4488e9ef7bb6SLars Ellenberg 		 * cannot ignore this completely. */
4489e9ef7bb6SLars Ellenberg 		if (peer_state.conn > C_CONNECTED &&
4490e9ef7bb6SLars Ellenberg 		    peer_state.conn < C_SYNC_SOURCE)
4491e9ef7bb6SLars Ellenberg 			real_peer_disk = D_INCONSISTENT;
4492e9ef7bb6SLars Ellenberg 
4493e9ef7bb6SLars Ellenberg 		/* if peer_state changes to connected at the same time,
4494e9ef7bb6SLars Ellenberg 		 * it explicitly notifies us that it finished resync.
4495e9ef7bb6SLars Ellenberg 		 * Maybe we should finish it up, too? */
4496e9ef7bb6SLars Ellenberg 		else if (os.conn >= C_SYNC_SOURCE &&
4497e9ef7bb6SLars Ellenberg 			 peer_state.conn == C_CONNECTED) {
4498b30ab791SAndreas Gruenbacher 			if (drbd_bm_total_weight(device) <= device->rs_failed)
4499b30ab791SAndreas Gruenbacher 				drbd_resync_finished(device);
450082bc0194SAndreas Gruenbacher 			return 0;
4501e9ef7bb6SLars Ellenberg 		}
4502e9ef7bb6SLars Ellenberg 	}
4503e9ef7bb6SLars Ellenberg 
450402b91b55SLars Ellenberg 	/* explicit verify finished notification, stop sector reached. */
450502b91b55SLars Ellenberg 	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
450602b91b55SLars Ellenberg 	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4507b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
4508b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
450958ffa580SLars Ellenberg 		return 0;
451002b91b55SLars Ellenberg 	}
451102b91b55SLars Ellenberg 
4512e9ef7bb6SLars Ellenberg 	/* peer says his disk is inconsistent, while we think it is uptodate,
4513e9ef7bb6SLars Ellenberg 	 * and this happens while the peer still thinks we have a sync going on,
4514e9ef7bb6SLars Ellenberg 	 * but we think we are already done with the sync.
4515e9ef7bb6SLars Ellenberg 	 * We ignore this to avoid flapping pdsk.
4516e9ef7bb6SLars Ellenberg 	 * This should not happen, if the peer is a recent version of drbd. */
4517e9ef7bb6SLars Ellenberg 	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4518e9ef7bb6SLars Ellenberg 	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4519e9ef7bb6SLars Ellenberg 		real_peer_disk = D_UP_TO_DATE;
4520e9ef7bb6SLars Ellenberg 
45214ac4aadaSLars Ellenberg 	if (ns.conn == C_WF_REPORT_PARAMS)
45224ac4aadaSLars Ellenberg 		ns.conn = C_CONNECTED;
4523b411b363SPhilipp Reisner 
452467531718SPhilipp Reisner 	if (peer_state.conn == C_AHEAD)
452567531718SPhilipp Reisner 		ns.conn = C_BEHIND;
452667531718SPhilipp Reisner 
4527fe43ed97SLars Ellenberg 	/* TODO:
4528fe43ed97SLars Ellenberg 	 * if (primary and diskless and peer uuid != effective uuid)
4529fe43ed97SLars Ellenberg 	 *     abort attach on peer;
4530fe43ed97SLars Ellenberg 	 *
4531fe43ed97SLars Ellenberg 	 * If this node does not have good data, was already connected, but
4532fe43ed97SLars Ellenberg 	 * the peer did a late attach only now, trying to "negotiate" with me,
4533fe43ed97SLars Ellenberg 	 * AND I am currently Primary, possibly frozen, with some specific
4534fe43ed97SLars Ellenberg 	 * "effective" uuid, this should never be reached, really, because
4535fe43ed97SLars Ellenberg 	 * we first send the uuids, then the current state.
4536fe43ed97SLars Ellenberg 	 *
4537fe43ed97SLars Ellenberg 	 * In this scenario, we already dropped the connection hard
4538fe43ed97SLars Ellenberg 	 * when we received the unsuitable uuids (receive_uuids().
4539fe43ed97SLars Ellenberg 	 *
4540fe43ed97SLars Ellenberg 	 * Should we want to change this, that is: not drop the connection in
4541fe43ed97SLars Ellenberg 	 * receive_uuids() already, then we would need to add a branch here
4542fe43ed97SLars Ellenberg 	 * that aborts the attach of "unsuitable uuids" on the peer in case
4543fe43ed97SLars Ellenberg 	 * this node is currently Diskless Primary.
4544fe43ed97SLars Ellenberg 	 */
4545fe43ed97SLars Ellenberg 
4546b30ab791SAndreas Gruenbacher 	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4547b30ab791SAndreas Gruenbacher 	    get_ldev_if_state(device, D_NEGOTIATING)) {
4548b411b363SPhilipp Reisner 		int cr; /* consider resync */
4549b411b363SPhilipp Reisner 
4550b411b363SPhilipp Reisner 		/* if we established a new connection */
45514ac4aadaSLars Ellenberg 		cr  = (os.conn < C_CONNECTED);
4552b411b363SPhilipp Reisner 		/* if we had an established connection
4553b411b363SPhilipp Reisner 		 * and one of the nodes newly attaches a disk */
45544ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4555b411b363SPhilipp Reisner 		       (peer_state.disk == D_NEGOTIATING ||
45564ac4aadaSLars Ellenberg 			os.disk == D_NEGOTIATING));
4557b411b363SPhilipp Reisner 		/* if we have both been inconsistent, and the peer has been
4558a2823ea9SLars Ellenberg 		 * forced to be UpToDate with --force */
4559b30ab791SAndreas Gruenbacher 		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4560b411b363SPhilipp Reisner 		/* if we had been plain connected, and the admin requested to
4561b411b363SPhilipp Reisner 		 * start a sync by "invalidate" or "invalidate-remote" */
45624ac4aadaSLars Ellenberg 		cr |= (os.conn == C_CONNECTED &&
4563b411b363SPhilipp Reisner 				(peer_state.conn >= C_STARTING_SYNC_S &&
4564b411b363SPhilipp Reisner 				 peer_state.conn <= C_WF_BITMAP_T));
4565b411b363SPhilipp Reisner 
4566b411b363SPhilipp Reisner 		if (cr)
456769a22773SAndreas Gruenbacher 			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4568b411b363SPhilipp Reisner 
4569b30ab791SAndreas Gruenbacher 		put_ldev(device);
45704ac4aadaSLars Ellenberg 		if (ns.conn == C_MASK) {
45714ac4aadaSLars Ellenberg 			ns.conn = C_CONNECTED;
4572b30ab791SAndreas Gruenbacher 			if (device->state.disk == D_NEGOTIATING) {
4573b30ab791SAndreas Gruenbacher 				drbd_force_state(device, NS(disk, D_FAILED));
4574b411b363SPhilipp Reisner 			} else if (peer_state.disk == D_NEGOTIATING) {
4575d0180171SAndreas Gruenbacher 				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4576b411b363SPhilipp Reisner 				peer_state.disk = D_DISKLESS;
4577580b9767SLars Ellenberg 				real_peer_disk = D_DISKLESS;
4578b411b363SPhilipp Reisner 			} else {
45799f4fe9adSAndreas Gruenbacher 				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
458082bc0194SAndreas Gruenbacher 					return -EIO;
45810b0ba1efSAndreas Gruenbacher 				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45829f4fe9adSAndreas Gruenbacher 				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
458382bc0194SAndreas Gruenbacher 				return -EIO;
4584b411b363SPhilipp Reisner 			}
4585b411b363SPhilipp Reisner 		}
4586b411b363SPhilipp Reisner 	}
4587b411b363SPhilipp Reisner 
45880500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
4589b30ab791SAndreas Gruenbacher 	if (os.i != drbd_read_state(device).i)
4590b411b363SPhilipp Reisner 		goto retry;
4591b30ab791SAndreas Gruenbacher 	clear_bit(CONSIDER_RESYNC, &device->flags);
4592b411b363SPhilipp Reisner 	ns.peer = peer_state.role;
4593b411b363SPhilipp Reisner 	ns.pdsk = real_peer_disk;
4594b411b363SPhilipp Reisner 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45954ac4aadaSLars Ellenberg 	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4596b30ab791SAndreas Gruenbacher 		ns.disk = device->new_state_tmp.disk;
45974ac4aadaSLars Ellenberg 	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4598b30ab791SAndreas Gruenbacher 	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4599b30ab791SAndreas Gruenbacher 	    test_bit(NEW_CUR_UUID, &device->flags)) {
46008554df1cSAndreas Gruenbacher 		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4601481c6f50SPhilipp Reisner 		   for temporal network outages! */
46020500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
4603d0180171SAndreas Gruenbacher 		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
46049f4fe9adSAndreas Gruenbacher 		tl_clear(peer_device->connection);
4605b30ab791SAndreas Gruenbacher 		drbd_uuid_new_current(device);
4606b30ab791SAndreas Gruenbacher 		clear_bit(NEW_CUR_UUID, &device->flags);
46079f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
460882bc0194SAndreas Gruenbacher 		return -EIO;
4609481c6f50SPhilipp Reisner 	}
4610b30ab791SAndreas Gruenbacher 	rv = _drbd_set_state(device, ns, cs_flags, NULL);
4611b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
46120500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
4613b411b363SPhilipp Reisner 
4614b411b363SPhilipp Reisner 	if (rv < SS_SUCCESS) {
46159f4fe9adSAndreas Gruenbacher 		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
461682bc0194SAndreas Gruenbacher 		return -EIO;
4617b411b363SPhilipp Reisner 	}
4618b411b363SPhilipp Reisner 
46194ac4aadaSLars Ellenberg 	if (os.conn > C_WF_REPORT_PARAMS) {
46204ac4aadaSLars Ellenberg 		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4621b411b363SPhilipp Reisner 		    peer_state.disk != D_NEGOTIATING ) {
4622b411b363SPhilipp Reisner 			/* we want resync, peer has not yet decided to sync... */
4623b411b363SPhilipp Reisner 			/* Nowadays only used when forcing a node into primary role and
4624b411b363SPhilipp Reisner 			   setting its disk to UpToDate with that */
462569a22773SAndreas Gruenbacher 			drbd_send_uuids(peer_device);
462669a22773SAndreas Gruenbacher 			drbd_send_current_state(peer_device);
4627b411b363SPhilipp Reisner 		}
4628b411b363SPhilipp Reisner 	}
4629b411b363SPhilipp Reisner 
4630b30ab791SAndreas Gruenbacher 	clear_bit(DISCARD_MY_DATA, &device->flags);
4631b411b363SPhilipp Reisner 
4632b30ab791SAndreas Gruenbacher 	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4633b411b363SPhilipp Reisner 
463482bc0194SAndreas Gruenbacher 	return 0;
4635b411b363SPhilipp Reisner }
4636b411b363SPhilipp Reisner 
4637bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4638b411b363SPhilipp Reisner {
46399f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4640b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4641e658983aSAndreas Gruenbacher 	struct p_rs_uuid *p = pi->data;
46424a76b161SAndreas Gruenbacher 
46439f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
46449f4fe9adSAndreas Gruenbacher 	if (!peer_device)
46454a76b161SAndreas Gruenbacher 		return -EIO;
46469f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4647b411b363SPhilipp Reisner 
4648b30ab791SAndreas Gruenbacher 	wait_event(device->misc_wait,
4649b30ab791SAndreas Gruenbacher 		   device->state.conn == C_WF_SYNC_UUID ||
4650b30ab791SAndreas Gruenbacher 		   device->state.conn == C_BEHIND ||
4651b30ab791SAndreas Gruenbacher 		   device->state.conn < C_CONNECTED ||
4652b30ab791SAndreas Gruenbacher 		   device->state.disk < D_NEGOTIATING);
4653b411b363SPhilipp Reisner 
46540b0ba1efSAndreas Gruenbacher 	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
4655b411b363SPhilipp Reisner 
4656b411b363SPhilipp Reisner 	/* Here the _drbd_uuid_ functions are right, current should
4657b411b363SPhilipp Reisner 	   _not_ be rotated into the history */
4658b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_NEGOTIATING)) {
4659b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4660b30ab791SAndreas Gruenbacher 		_drbd_uuid_set(device, UI_BITMAP, 0UL);
4661b411b363SPhilipp Reisner 
4662b30ab791SAndreas Gruenbacher 		drbd_print_uuids(device, "updated sync uuid");
4663b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_TARGET);
4664b411b363SPhilipp Reisner 
4665b30ab791SAndreas Gruenbacher 		put_ldev(device);
4666b411b363SPhilipp Reisner 	} else
4667d0180171SAndreas Gruenbacher 		drbd_err(device, "Ignoring SyncUUID packet!\n");
4668b411b363SPhilipp Reisner 
466982bc0194SAndreas Gruenbacher 	return 0;
4670b411b363SPhilipp Reisner }
4671b411b363SPhilipp Reisner 
46722c46407dSAndreas Gruenbacher /**
46732c46407dSAndreas Gruenbacher  * receive_bitmap_plain
46742c46407dSAndreas Gruenbacher  *
46752c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
46762c46407dSAndreas Gruenbacher  * code upon failure.
46772c46407dSAndreas Gruenbacher  */
46782c46407dSAndreas Gruenbacher static int
467969a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4680e658983aSAndreas Gruenbacher 		     unsigned long *p, struct bm_xfer_ctx *c)
4681b411b363SPhilipp Reisner {
468250d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
468369a22773SAndreas Gruenbacher 				 drbd_header_size(peer_device->connection);
4684e658983aSAndreas Gruenbacher 	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
468550d0b1adSAndreas Gruenbacher 				       c->bm_words - c->word_offset);
4686e658983aSAndreas Gruenbacher 	unsigned int want = num_words * sizeof(*p);
46872c46407dSAndreas Gruenbacher 	int err;
4688b411b363SPhilipp Reisner 
468950d0b1adSAndreas Gruenbacher 	if (want != size) {
469069a22773SAndreas Gruenbacher 		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46912c46407dSAndreas Gruenbacher 		return -EIO;
4692b411b363SPhilipp Reisner 	}
4693b411b363SPhilipp Reisner 	if (want == 0)
46942c46407dSAndreas Gruenbacher 		return 0;
469569a22773SAndreas Gruenbacher 	err = drbd_recv_all(peer_device->connection, p, want);
469682bc0194SAndreas Gruenbacher 	if (err)
46972c46407dSAndreas Gruenbacher 		return err;
4698b411b363SPhilipp Reisner 
469969a22773SAndreas Gruenbacher 	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4700b411b363SPhilipp Reisner 
4701b411b363SPhilipp Reisner 	c->word_offset += num_words;
4702b411b363SPhilipp Reisner 	c->bit_offset = c->word_offset * BITS_PER_LONG;
4703b411b363SPhilipp Reisner 	if (c->bit_offset > c->bm_bits)
4704b411b363SPhilipp Reisner 		c->bit_offset = c->bm_bits;
4705b411b363SPhilipp Reisner 
47062c46407dSAndreas Gruenbacher 	return 1;
4707b411b363SPhilipp Reisner }
4708b411b363SPhilipp Reisner 
4709a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4710a02d1240SAndreas Gruenbacher {
4711a02d1240SAndreas Gruenbacher 	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4712a02d1240SAndreas Gruenbacher }
4713a02d1240SAndreas Gruenbacher 
4714a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4715a02d1240SAndreas Gruenbacher {
4716a02d1240SAndreas Gruenbacher 	return (p->encoding & 0x80) != 0;
4717a02d1240SAndreas Gruenbacher }
4718a02d1240SAndreas Gruenbacher 
4719a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4720a02d1240SAndreas Gruenbacher {
4721a02d1240SAndreas Gruenbacher 	return (p->encoding >> 4) & 0x7;
4722a02d1240SAndreas Gruenbacher }
4723a02d1240SAndreas Gruenbacher 
47242c46407dSAndreas Gruenbacher /**
47252c46407dSAndreas Gruenbacher  * recv_bm_rle_bits
47262c46407dSAndreas Gruenbacher  *
47272c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47282c46407dSAndreas Gruenbacher  * code upon failure.
47292c46407dSAndreas Gruenbacher  */
47302c46407dSAndreas Gruenbacher static int
473169a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4732b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4733c6d25cfeSPhilipp Reisner 		 struct bm_xfer_ctx *c,
4734c6d25cfeSPhilipp Reisner 		 unsigned int len)
4735b411b363SPhilipp Reisner {
4736b411b363SPhilipp Reisner 	struct bitstream bs;
4737b411b363SPhilipp Reisner 	u64 look_ahead;
4738b411b363SPhilipp Reisner 	u64 rl;
4739b411b363SPhilipp Reisner 	u64 tmp;
4740b411b363SPhilipp Reisner 	unsigned long s = c->bit_offset;
4741b411b363SPhilipp Reisner 	unsigned long e;
4742a02d1240SAndreas Gruenbacher 	int toggle = dcbp_get_start(p);
4743b411b363SPhilipp Reisner 	int have;
4744b411b363SPhilipp Reisner 	int bits;
4745b411b363SPhilipp Reisner 
4746a02d1240SAndreas Gruenbacher 	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4747b411b363SPhilipp Reisner 
4748b411b363SPhilipp Reisner 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
4749b411b363SPhilipp Reisner 	if (bits < 0)
47502c46407dSAndreas Gruenbacher 		return -EIO;
4751b411b363SPhilipp Reisner 
4752b411b363SPhilipp Reisner 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
4753b411b363SPhilipp Reisner 		bits = vli_decode_bits(&rl, look_ahead);
4754b411b363SPhilipp Reisner 		if (bits <= 0)
47552c46407dSAndreas Gruenbacher 			return -EIO;
4756b411b363SPhilipp Reisner 
4757b411b363SPhilipp Reisner 		if (toggle) {
4758b411b363SPhilipp Reisner 			e = s + rl -1;
4759b411b363SPhilipp Reisner 			if (e >= c->bm_bits) {
476069a22773SAndreas Gruenbacher 				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47612c46407dSAndreas Gruenbacher 				return -EIO;
4762b411b363SPhilipp Reisner 			}
476369a22773SAndreas Gruenbacher 			_drbd_bm_set_bits(peer_device->device, s, e);
4764b411b363SPhilipp Reisner 		}
4765b411b363SPhilipp Reisner 
4766b411b363SPhilipp Reisner 		if (have < bits) {
476769a22773SAndreas Gruenbacher 			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4768b411b363SPhilipp Reisner 				have, bits, look_ahead,
4769b411b363SPhilipp Reisner 				(unsigned int)(bs.cur.b - p->code),
4770b411b363SPhilipp Reisner 				(unsigned int)bs.buf_len);
47712c46407dSAndreas Gruenbacher 			return -EIO;
4772b411b363SPhilipp Reisner 		}
4773d2da5b0cSLars Ellenberg 		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4774d2da5b0cSLars Ellenberg 		if (likely(bits < 64))
4775b411b363SPhilipp Reisner 			look_ahead >>= bits;
4776d2da5b0cSLars Ellenberg 		else
4777d2da5b0cSLars Ellenberg 			look_ahead = 0;
4778b411b363SPhilipp Reisner 		have -= bits;
4779b411b363SPhilipp Reisner 
4780b411b363SPhilipp Reisner 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4781b411b363SPhilipp Reisner 		if (bits < 0)
47822c46407dSAndreas Gruenbacher 			return -EIO;
4783b411b363SPhilipp Reisner 		look_ahead |= tmp << have;
4784b411b363SPhilipp Reisner 		have += bits;
4785b411b363SPhilipp Reisner 	}
4786b411b363SPhilipp Reisner 
4787b411b363SPhilipp Reisner 	c->bit_offset = s;
4788b411b363SPhilipp Reisner 	bm_xfer_ctx_bit_to_word_offset(c);
4789b411b363SPhilipp Reisner 
47902c46407dSAndreas Gruenbacher 	return (s != c->bm_bits);
4791b411b363SPhilipp Reisner }
4792b411b363SPhilipp Reisner 
47932c46407dSAndreas Gruenbacher /**
47942c46407dSAndreas Gruenbacher  * decode_bitmap_c
47952c46407dSAndreas Gruenbacher  *
47962c46407dSAndreas Gruenbacher  * Return 0 when done, 1 when another iteration is needed, and a negative error
47972c46407dSAndreas Gruenbacher  * code upon failure.
47982c46407dSAndreas Gruenbacher  */
47992c46407dSAndreas Gruenbacher static int
480069a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4801b411b363SPhilipp Reisner 		struct p_compressed_bm *p,
4802c6d25cfeSPhilipp Reisner 		struct bm_xfer_ctx *c,
4803c6d25cfeSPhilipp Reisner 		unsigned int len)
4804b411b363SPhilipp Reisner {
4805a02d1240SAndreas Gruenbacher 	if (dcbp_get_code(p) == RLE_VLI_Bits)
480669a22773SAndreas Gruenbacher 		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4807b411b363SPhilipp Reisner 
4808b411b363SPhilipp Reisner 	/* other variants had been implemented for evaluation,
4809b411b363SPhilipp Reisner 	 * but have been dropped as this one turned out to be "best"
4810b411b363SPhilipp Reisner 	 * during all our tests. */
4811b411b363SPhilipp Reisner 
481269a22773SAndreas Gruenbacher 	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
481369a22773SAndreas Gruenbacher 	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
48142c46407dSAndreas Gruenbacher 	return -EIO;
4815b411b363SPhilipp Reisner }
4816b411b363SPhilipp Reisner 
4817b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device,
4818b411b363SPhilipp Reisner 		const char *direction, struct bm_xfer_ctx *c)
4819b411b363SPhilipp Reisner {
4820b411b363SPhilipp Reisner 	/* what would it take to transfer it "plaintext" */
4821a6b32bc3SAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
482250d0b1adSAndreas Gruenbacher 	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
482350d0b1adSAndreas Gruenbacher 	unsigned int plain =
482450d0b1adSAndreas Gruenbacher 		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
482550d0b1adSAndreas Gruenbacher 		c->bm_words * sizeof(unsigned long);
482650d0b1adSAndreas Gruenbacher 	unsigned int total = c->bytes[0] + c->bytes[1];
482750d0b1adSAndreas Gruenbacher 	unsigned int r;
4828b411b363SPhilipp Reisner 
4829b411b363SPhilipp Reisner 	/* total can not be zero. but just in case: */
4830b411b363SPhilipp Reisner 	if (total == 0)
4831b411b363SPhilipp Reisner 		return;
4832b411b363SPhilipp Reisner 
4833b411b363SPhilipp Reisner 	/* don't report if not compressed */
4834b411b363SPhilipp Reisner 	if (total >= plain)
4835b411b363SPhilipp Reisner 		return;
4836b411b363SPhilipp Reisner 
4837b411b363SPhilipp Reisner 	/* total < plain. check for overflow, still */
4838b411b363SPhilipp Reisner 	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4839b411b363SPhilipp Reisner 		                    : (1000 * total / plain);
4840b411b363SPhilipp Reisner 
4841b411b363SPhilipp Reisner 	if (r > 1000)
4842b411b363SPhilipp Reisner 		r = 1000;
4843b411b363SPhilipp Reisner 
4844b411b363SPhilipp Reisner 	r = 1000 - r;
4845d0180171SAndreas Gruenbacher 	drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4846b411b363SPhilipp Reisner 	     "total %u; compression: %u.%u%%\n",
4847b411b363SPhilipp Reisner 			direction,
4848b411b363SPhilipp Reisner 			c->bytes[1], c->packets[1],
4849b411b363SPhilipp Reisner 			c->bytes[0], c->packets[0],
4850b411b363SPhilipp Reisner 			total, r/10, r % 10);
4851b411b363SPhilipp Reisner }
4852b411b363SPhilipp Reisner 
4853b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4854b411b363SPhilipp Reisner    it does not matter if the process it in 32 bit chunks or 64 bit
4855b411b363SPhilipp Reisner    chunks as long as it is little endian. (Understand it as byte stream,
4856b411b363SPhilipp Reisner    beginning with the lowest byte...) If we would use big endian
4857b411b363SPhilipp Reisner    we would need to process it from the highest address to the lowest,
4858b411b363SPhilipp Reisner    in order to be agnostic to the 32 vs 64 bits issue.
4859b411b363SPhilipp Reisner 
4860b411b363SPhilipp Reisner    returns 0 on failure, 1 if we successfully received it. */
4861bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4862b411b363SPhilipp Reisner {
48639f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4864b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4865b411b363SPhilipp Reisner 	struct bm_xfer_ctx c;
48662c46407dSAndreas Gruenbacher 	int err;
48674a76b161SAndreas Gruenbacher 
48689f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
48699f4fe9adSAndreas Gruenbacher 	if (!peer_device)
48704a76b161SAndreas Gruenbacher 		return -EIO;
48719f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
4872b411b363SPhilipp Reisner 
4873b30ab791SAndreas Gruenbacher 	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
487420ceb2b2SLars Ellenberg 	/* you are supposed to send additional out-of-sync information
487520ceb2b2SLars Ellenberg 	 * if you actually set bits during this phase */
4876b411b363SPhilipp Reisner 
4877b411b363SPhilipp Reisner 	c = (struct bm_xfer_ctx) {
4878b30ab791SAndreas Gruenbacher 		.bm_bits = drbd_bm_bits(device),
4879b30ab791SAndreas Gruenbacher 		.bm_words = drbd_bm_words(device),
4880b411b363SPhilipp Reisner 	};
4881b411b363SPhilipp Reisner 
48822c46407dSAndreas Gruenbacher 	for(;;) {
4883e658983aSAndreas Gruenbacher 		if (pi->cmd == P_BITMAP)
488469a22773SAndreas Gruenbacher 			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4885e658983aSAndreas Gruenbacher 		else if (pi->cmd == P_COMPRESSED_BITMAP) {
4886b411b363SPhilipp Reisner 			/* MAYBE: sanity check that we speak proto >= 90,
4887b411b363SPhilipp Reisner 			 * and the feature is enabled! */
4888e658983aSAndreas Gruenbacher 			struct p_compressed_bm *p = pi->data;
4889b411b363SPhilipp Reisner 
4890bde89a9eSAndreas Gruenbacher 			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4891d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too large\n");
489282bc0194SAndreas Gruenbacher 				err = -EIO;
4893b411b363SPhilipp Reisner 				goto out;
4894b411b363SPhilipp Reisner 			}
4895e658983aSAndreas Gruenbacher 			if (pi->size <= sizeof(*p)) {
4896d0180171SAndreas Gruenbacher 				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
489782bc0194SAndreas Gruenbacher 				err = -EIO;
489878fcbdaeSAndreas Gruenbacher 				goto out;
4899b411b363SPhilipp Reisner 			}
49009f4fe9adSAndreas Gruenbacher 			err = drbd_recv_all(peer_device->connection, p, pi->size);
4901e658983aSAndreas Gruenbacher 			if (err)
4902e658983aSAndreas Gruenbacher 			       goto out;
490369a22773SAndreas Gruenbacher 			err = decode_bitmap_c(peer_device, p, &c, pi->size);
4904b411b363SPhilipp Reisner 		} else {
4905d0180171SAndreas Gruenbacher 			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
490682bc0194SAndreas Gruenbacher 			err = -EIO;
4907b411b363SPhilipp Reisner 			goto out;
4908b411b363SPhilipp Reisner 		}
4909b411b363SPhilipp Reisner 
4910e2857216SAndreas Gruenbacher 		c.packets[pi->cmd == P_BITMAP]++;
4911bde89a9eSAndreas Gruenbacher 		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4912b411b363SPhilipp Reisner 
49132c46407dSAndreas Gruenbacher 		if (err <= 0) {
49142c46407dSAndreas Gruenbacher 			if (err < 0)
49152c46407dSAndreas Gruenbacher 				goto out;
4916b411b363SPhilipp Reisner 			break;
49172c46407dSAndreas Gruenbacher 		}
49189f4fe9adSAndreas Gruenbacher 		err = drbd_recv_header(peer_device->connection, pi);
491982bc0194SAndreas Gruenbacher 		if (err)
4920b411b363SPhilipp Reisner 			goto out;
49212c46407dSAndreas Gruenbacher 	}
4922b411b363SPhilipp Reisner 
4923b30ab791SAndreas Gruenbacher 	INFO_bm_xfer_stats(device, "receive", &c);
4924b411b363SPhilipp Reisner 
4925b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_WF_BITMAP_T) {
4926de1f8e4aSAndreas Gruenbacher 		enum drbd_state_rv rv;
4927de1f8e4aSAndreas Gruenbacher 
4928b30ab791SAndreas Gruenbacher 		err = drbd_send_bitmap(device);
492982bc0194SAndreas Gruenbacher 		if (err)
4930b411b363SPhilipp Reisner 			goto out;
4931b411b363SPhilipp Reisner 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4932b30ab791SAndreas Gruenbacher 		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
49330b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, rv == SS_SUCCESS);
4934b30ab791SAndreas Gruenbacher 	} else if (device->state.conn != C_WF_BITMAP_S) {
4935b411b363SPhilipp Reisner 		/* admin may have requested C_DISCONNECTING,
4936b411b363SPhilipp Reisner 		 * other threads may have noticed network errors */
4937d0180171SAndreas Gruenbacher 		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4938b30ab791SAndreas Gruenbacher 		    drbd_conn_str(device->state.conn));
4939b411b363SPhilipp Reisner 	}
494082bc0194SAndreas Gruenbacher 	err = 0;
4941b411b363SPhilipp Reisner 
4942b411b363SPhilipp Reisner  out:
4943b30ab791SAndreas Gruenbacher 	drbd_bm_unlock(device);
4944b30ab791SAndreas Gruenbacher 	if (!err && device->state.conn == C_WF_BITMAP_S)
4945b30ab791SAndreas Gruenbacher 		drbd_start_resync(device, C_SYNC_SOURCE);
494682bc0194SAndreas Gruenbacher 	return err;
4947b411b363SPhilipp Reisner }
4948b411b363SPhilipp Reisner 
4949bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4950b411b363SPhilipp Reisner {
49511ec861ebSAndreas Gruenbacher 	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4952e2857216SAndreas Gruenbacher 		 pi->cmd, pi->size);
4953b411b363SPhilipp Reisner 
4954bde89a9eSAndreas Gruenbacher 	return ignore_remaining_packet(connection, pi);
4955b411b363SPhilipp Reisner }
4956b411b363SPhilipp Reisner 
4957bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4958b411b363SPhilipp Reisner {
4959b411b363SPhilipp Reisner 	/* Make sure we've acked all the TCP data associated
4960b411b363SPhilipp Reisner 	 * with the data requests being unplugged */
4961bde89a9eSAndreas Gruenbacher 	drbd_tcp_quickack(connection->data.socket);
4962b411b363SPhilipp Reisner 
496382bc0194SAndreas Gruenbacher 	return 0;
4964b411b363SPhilipp Reisner }
4965b411b363SPhilipp Reisner 
4966bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
496773a01a18SPhilipp Reisner {
49689f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
4969b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
4970e658983aSAndreas Gruenbacher 	struct p_block_desc *p = pi->data;
49714a76b161SAndreas Gruenbacher 
49729f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
49739f4fe9adSAndreas Gruenbacher 	if (!peer_device)
49744a76b161SAndreas Gruenbacher 		return -EIO;
49759f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
497673a01a18SPhilipp Reisner 
4977b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
4978f735e363SLars Ellenberg 	case C_WF_SYNC_UUID:
4979f735e363SLars Ellenberg 	case C_WF_BITMAP_T:
4980f735e363SLars Ellenberg 	case C_BEHIND:
4981f735e363SLars Ellenberg 			break;
4982f735e363SLars Ellenberg 	default:
4983d0180171SAndreas Gruenbacher 		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4984b30ab791SAndreas Gruenbacher 				drbd_conn_str(device->state.conn));
4985f735e363SLars Ellenberg 	}
4986f735e363SLars Ellenberg 
4987b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
498873a01a18SPhilipp Reisner 
498982bc0194SAndreas Gruenbacher 	return 0;
499073a01a18SPhilipp Reisner }
499173a01a18SPhilipp Reisner 
4992700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4993700ca8c0SPhilipp Reisner {
4994700ca8c0SPhilipp Reisner 	struct drbd_peer_device *peer_device;
4995700ca8c0SPhilipp Reisner 	struct p_block_desc *p = pi->data;
4996700ca8c0SPhilipp Reisner 	struct drbd_device *device;
4997700ca8c0SPhilipp Reisner 	sector_t sector;
4998700ca8c0SPhilipp Reisner 	int size, err = 0;
4999700ca8c0SPhilipp Reisner 
5000700ca8c0SPhilipp Reisner 	peer_device = conn_peer_device(connection, pi->vnr);
5001700ca8c0SPhilipp Reisner 	if (!peer_device)
5002700ca8c0SPhilipp Reisner 		return -EIO;
5003700ca8c0SPhilipp Reisner 	device = peer_device->device;
5004700ca8c0SPhilipp Reisner 
5005700ca8c0SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5006700ca8c0SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5007700ca8c0SPhilipp Reisner 
5008700ca8c0SPhilipp Reisner 	dec_rs_pending(device);
5009700ca8c0SPhilipp Reisner 
5010700ca8c0SPhilipp Reisner 	if (get_ldev(device)) {
5011700ca8c0SPhilipp Reisner 		struct drbd_peer_request *peer_req;
501245c21793SChristoph Hellwig 		const int op = REQ_OP_WRITE_ZEROES;
5013700ca8c0SPhilipp Reisner 
5014700ca8c0SPhilipp Reisner 		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
50159104d31aSLars Ellenberg 					       size, 0, GFP_NOIO);
5016700ca8c0SPhilipp Reisner 		if (!peer_req) {
5017700ca8c0SPhilipp Reisner 			put_ldev(device);
5018700ca8c0SPhilipp Reisner 			return -ENOMEM;
5019700ca8c0SPhilipp Reisner 		}
5020700ca8c0SPhilipp Reisner 
5021700ca8c0SPhilipp Reisner 		peer_req->w.cb = e_end_resync_block;
5022700ca8c0SPhilipp Reisner 		peer_req->submit_jif = jiffies;
5023f31e583aSLars Ellenberg 		peer_req->flags |= EE_TRIM;
5024700ca8c0SPhilipp Reisner 
5025700ca8c0SPhilipp Reisner 		spin_lock_irq(&device->resource->req_lock);
5026700ca8c0SPhilipp Reisner 		list_add_tail(&peer_req->w.list, &device->sync_ee);
5027700ca8c0SPhilipp Reisner 		spin_unlock_irq(&device->resource->req_lock);
5028700ca8c0SPhilipp Reisner 
5029700ca8c0SPhilipp Reisner 		atomic_add(pi->size >> 9, &device->rs_sect_ev);
5030700ca8c0SPhilipp Reisner 		err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
5031700ca8c0SPhilipp Reisner 
5032700ca8c0SPhilipp Reisner 		if (err) {
5033700ca8c0SPhilipp Reisner 			spin_lock_irq(&device->resource->req_lock);
5034700ca8c0SPhilipp Reisner 			list_del(&peer_req->w.list);
5035700ca8c0SPhilipp Reisner 			spin_unlock_irq(&device->resource->req_lock);
5036700ca8c0SPhilipp Reisner 
5037700ca8c0SPhilipp Reisner 			drbd_free_peer_req(device, peer_req);
5038700ca8c0SPhilipp Reisner 			put_ldev(device);
5039700ca8c0SPhilipp Reisner 			err = 0;
5040700ca8c0SPhilipp Reisner 			goto fail;
5041700ca8c0SPhilipp Reisner 		}
5042700ca8c0SPhilipp Reisner 
5043700ca8c0SPhilipp Reisner 		inc_unacked(device);
5044700ca8c0SPhilipp Reisner 
5045700ca8c0SPhilipp Reisner 		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
5046700ca8c0SPhilipp Reisner 		   as well as drbd_rs_complete_io() */
5047700ca8c0SPhilipp Reisner 	} else {
5048700ca8c0SPhilipp Reisner 	fail:
5049700ca8c0SPhilipp Reisner 		drbd_rs_complete_io(device, sector);
5050700ca8c0SPhilipp Reisner 		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5051700ca8c0SPhilipp Reisner 	}
5052700ca8c0SPhilipp Reisner 
5053700ca8c0SPhilipp Reisner 	atomic_add(size >> 9, &device->rs_sect_in);
5054700ca8c0SPhilipp Reisner 
5055700ca8c0SPhilipp Reisner 	return err;
5056700ca8c0SPhilipp Reisner }
5057700ca8c0SPhilipp Reisner 
505802918be2SPhilipp Reisner struct data_cmd {
505902918be2SPhilipp Reisner 	int expect_payload;
50609104d31aSLars Ellenberg 	unsigned int pkt_size;
5061bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *, struct packet_info *);
5062b411b363SPhilipp Reisner };
5063b411b363SPhilipp Reisner 
506402918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
506502918be2SPhilipp Reisner 	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
506602918be2SPhilipp Reisner 	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
506702918be2SPhilipp Reisner 	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
506802918be2SPhilipp Reisner 	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5069e658983aSAndreas Gruenbacher 	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
5070e658983aSAndreas Gruenbacher 	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5071e658983aSAndreas Gruenbacher 	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
507202918be2SPhilipp Reisner 	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
507302918be2SPhilipp Reisner 	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5074e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
5075e658983aSAndreas Gruenbacher 	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
507602918be2SPhilipp Reisner 	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
507702918be2SPhilipp Reisner 	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
507802918be2SPhilipp Reisner 	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
507902918be2SPhilipp Reisner 	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
508002918be2SPhilipp Reisner 	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
508102918be2SPhilipp Reisner 	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
508202918be2SPhilipp Reisner 	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
508302918be2SPhilipp Reisner 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
508402918be2SPhilipp Reisner 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5085700ca8c0SPhilipp Reisner 	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
508602918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
508773a01a18SPhilipp Reisner 	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50884a76b161SAndreas Gruenbacher 	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5089036b17eaSPhilipp Reisner 	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5090a0fb3c47SLars Ellenberg 	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
5091f31e583aSLars Ellenberg 	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
5092700ca8c0SPhilipp Reisner 	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
50939104d31aSLars Ellenberg 	[P_WSAME]	    = { 1, sizeof(struct p_wsame), receive_Data },
509402918be2SPhilipp Reisner };
509502918be2SPhilipp Reisner 
5096bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5097b411b363SPhilipp Reisner {
509877351055SPhilipp Reisner 	struct packet_info pi;
509902918be2SPhilipp Reisner 	size_t shs; /* sub header size */
510082bc0194SAndreas Gruenbacher 	int err;
5101b411b363SPhilipp Reisner 
5102bde89a9eSAndreas Gruenbacher 	while (get_t_state(&connection->receiver) == RUNNING) {
51039104d31aSLars Ellenberg 		struct data_cmd const *cmd;
5104deebe195SAndreas Gruenbacher 
5105bde89a9eSAndreas Gruenbacher 		drbd_thread_current_set_cpu(&connection->receiver);
5106c51a0ef3SLars Ellenberg 		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5107c51a0ef3SLars Ellenberg 		if (drbd_recv_header_maybe_unplug(connection, &pi))
510802918be2SPhilipp Reisner 			goto err_out;
510902918be2SPhilipp Reisner 
5110deebe195SAndreas Gruenbacher 		cmd = &drbd_cmd_handler[pi.cmd];
51114a76b161SAndreas Gruenbacher 		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
51121ec861ebSAndreas Gruenbacher 			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
51132fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.cmd);
511402918be2SPhilipp Reisner 			goto err_out;
51150b33a916SLars Ellenberg 		}
5116b411b363SPhilipp Reisner 
5117e658983aSAndreas Gruenbacher 		shs = cmd->pkt_size;
51189104d31aSLars Ellenberg 		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
51199104d31aSLars Ellenberg 			shs += sizeof(struct o_qlim);
5120e658983aSAndreas Gruenbacher 		if (pi.size > shs && !cmd->expect_payload) {
51211ec861ebSAndreas Gruenbacher 			drbd_err(connection, "No payload expected %s l:%d\n",
51222fcb8f30SAndreas Gruenbacher 				 cmdname(pi.cmd), pi.size);
5123c13f7e1aSLars Ellenberg 			goto err_out;
5124c13f7e1aSLars Ellenberg 		}
51259104d31aSLars Ellenberg 		if (pi.size < shs) {
51269104d31aSLars Ellenberg 			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
51279104d31aSLars Ellenberg 				 cmdname(pi.cmd), (int)shs, pi.size);
51289104d31aSLars Ellenberg 			goto err_out;
51299104d31aSLars Ellenberg 		}
5130c13f7e1aSLars Ellenberg 
5131c13f7e1aSLars Ellenberg 		if (shs) {
5132944410e9SLars Ellenberg 			update_receiver_timing_details(connection, drbd_recv_all_warn);
5133bde89a9eSAndreas Gruenbacher 			err = drbd_recv_all_warn(connection, pi.data, shs);
5134a5c31904SAndreas Gruenbacher 			if (err)
513502918be2SPhilipp Reisner 				goto err_out;
5136e2857216SAndreas Gruenbacher 			pi.size -= shs;
5137b411b363SPhilipp Reisner 		}
513802918be2SPhilipp Reisner 
5139944410e9SLars Ellenberg 		update_receiver_timing_details(connection, cmd->fn);
5140bde89a9eSAndreas Gruenbacher 		err = cmd->fn(connection, &pi);
51414a76b161SAndreas Gruenbacher 		if (err) {
51421ec861ebSAndreas Gruenbacher 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
51439f5bdc33SAndreas Gruenbacher 				 cmdname(pi.cmd), err, pi.size);
514402918be2SPhilipp Reisner 			goto err_out;
514502918be2SPhilipp Reisner 		}
514602918be2SPhilipp Reisner 	}
514782bc0194SAndreas Gruenbacher 	return;
514802918be2SPhilipp Reisner 
514902918be2SPhilipp Reisner     err_out:
5150bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5151b411b363SPhilipp Reisner }
5152b411b363SPhilipp Reisner 
5153bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5154f70b3511SPhilipp Reisner {
5155c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5156bbeb641cSPhilipp Reisner 	enum drbd_conns oc;
5157376694a0SPhilipp Reisner 	int vnr;
5158f70b3511SPhilipp Reisner 
5159bde89a9eSAndreas Gruenbacher 	if (connection->cstate == C_STANDALONE)
5160b411b363SPhilipp Reisner 		return;
5161b411b363SPhilipp Reisner 
5162545752d5SLars Ellenberg 	/* We are about to start the cleanup after connection loss.
5163545752d5SLars Ellenberg 	 * Make sure drbd_make_request knows about that.
5164545752d5SLars Ellenberg 	 * Usually we should be in some network failure state already,
5165545752d5SLars Ellenberg 	 * but just in case we are not, we fix it up here.
5166545752d5SLars Ellenberg 	 */
5167bde89a9eSAndreas Gruenbacher 	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5168545752d5SLars Ellenberg 
5169668700b4SPhilipp Reisner 	/* ack_receiver does not clean up anything. it must not interfere, either */
51701c03e520SPhilipp Reisner 	drbd_thread_stop(&connection->ack_receiver);
5171668700b4SPhilipp Reisner 	if (connection->ack_sender) {
5172668700b4SPhilipp Reisner 		destroy_workqueue(connection->ack_sender);
5173668700b4SPhilipp Reisner 		connection->ack_sender = NULL;
5174668700b4SPhilipp Reisner 	}
5175bde89a9eSAndreas Gruenbacher 	drbd_free_sock(connection);
5176360cc740SPhilipp Reisner 
5177c141ebdaSPhilipp Reisner 	rcu_read_lock();
5178c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5179c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5180b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
5181c141ebdaSPhilipp Reisner 		rcu_read_unlock();
518269a22773SAndreas Gruenbacher 		drbd_disconnected(peer_device);
5183c06ece6bSAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
5184c141ebdaSPhilipp Reisner 		rcu_read_lock();
5185c141ebdaSPhilipp Reisner 	}
5186c141ebdaSPhilipp Reisner 	rcu_read_unlock();
5187c141ebdaSPhilipp Reisner 
5188bde89a9eSAndreas Gruenbacher 	if (!list_empty(&connection->current_epoch->list))
51891ec861ebSAndreas Gruenbacher 		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
519012038a3aSPhilipp Reisner 	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5191bde89a9eSAndreas Gruenbacher 	atomic_set(&connection->current_epoch->epoch_size, 0);
5192bde89a9eSAndreas Gruenbacher 	connection->send.seen_any_write_yet = false;
519312038a3aSPhilipp Reisner 
51941ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Connection closed\n");
5195360cc740SPhilipp Reisner 
5196bde89a9eSAndreas Gruenbacher 	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5197bde89a9eSAndreas Gruenbacher 		conn_try_outdate_peer_async(connection);
5198cb703454SPhilipp Reisner 
51990500813fSAndreas Gruenbacher 	spin_lock_irq(&connection->resource->req_lock);
5200bde89a9eSAndreas Gruenbacher 	oc = connection->cstate;
5201bbeb641cSPhilipp Reisner 	if (oc >= C_UNCONNECTED)
5202bde89a9eSAndreas Gruenbacher 		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5203bbeb641cSPhilipp Reisner 
52040500813fSAndreas Gruenbacher 	spin_unlock_irq(&connection->resource->req_lock);
5205360cc740SPhilipp Reisner 
5206f3dfa40aSLars Ellenberg 	if (oc == C_DISCONNECTING)
5207bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5208360cc740SPhilipp Reisner }
5209360cc740SPhilipp Reisner 
521069a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5211360cc740SPhilipp Reisner {
521269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
5213360cc740SPhilipp Reisner 	unsigned int i;
5214b411b363SPhilipp Reisner 
521585719573SPhilipp Reisner 	/* wait for current activity to cease. */
52160500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5217b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->active_ee);
5218b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->sync_ee);
5219b30ab791SAndreas Gruenbacher 	_drbd_wait_ee_list_empty(device, &device->read_ee);
52200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5221b411b363SPhilipp Reisner 
5222b411b363SPhilipp Reisner 	/* We do not have data structures that would allow us to
5223b411b363SPhilipp Reisner 	 * get the rs_pending_cnt down to 0 again.
5224b411b363SPhilipp Reisner 	 *  * On C_SYNC_TARGET we do not have any data structures describing
5225b411b363SPhilipp Reisner 	 *    the pending RSDataRequest's we have sent.
5226b411b363SPhilipp Reisner 	 *  * On C_SYNC_SOURCE there is no data structure that tracks
5227b411b363SPhilipp Reisner 	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5228b411b363SPhilipp Reisner 	 *  And no, it is not the sum of the reference counts in the
5229b411b363SPhilipp Reisner 	 *  resync_LRU. The resync_LRU tracks the whole operation including
5230b411b363SPhilipp Reisner 	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
5231b411b363SPhilipp Reisner 	 *  on the fly. */
5232b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5233b30ab791SAndreas Gruenbacher 	device->rs_total = 0;
5234b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
5235b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_pending_cnt, 0);
5236b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
5237b411b363SPhilipp Reisner 
5238b30ab791SAndreas Gruenbacher 	del_timer_sync(&device->resync_timer);
52392bccef39SKees Cook 	resync_timer_fn(&device->resync_timer);
5240b411b363SPhilipp Reisner 
5241b411b363SPhilipp Reisner 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5242b411b363SPhilipp Reisner 	 * w_make_resync_request etc. which may still be on the worker queue
5243b411b363SPhilipp Reisner 	 * to be "canceled" */
5244b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5245b411b363SPhilipp Reisner 
5246b30ab791SAndreas Gruenbacher 	drbd_finish_peer_reqs(device);
5247b411b363SPhilipp Reisner 
5248d10b4ea3SPhilipp Reisner 	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5249d10b4ea3SPhilipp Reisner 	   might have issued a work again. The one before drbd_finish_peer_reqs() is
5250d10b4ea3SPhilipp Reisner 	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5251b5043c5eSAndreas Gruenbacher 	drbd_flush_workqueue(&peer_device->connection->sender_work);
5252d10b4ea3SPhilipp Reisner 
525308332d73SLars Ellenberg 	/* need to do it again, drbd_finish_peer_reqs() may have populated it
525408332d73SLars Ellenberg 	 * again via drbd_try_clear_on_disk_bm(). */
5255b30ab791SAndreas Gruenbacher 	drbd_rs_cancel_all(device);
5256b411b363SPhilipp Reisner 
5257b30ab791SAndreas Gruenbacher 	kfree(device->p_uuid);
5258b30ab791SAndreas Gruenbacher 	device->p_uuid = NULL;
5259b411b363SPhilipp Reisner 
5260b30ab791SAndreas Gruenbacher 	if (!drbd_suspended(device))
526169a22773SAndreas Gruenbacher 		tl_clear(peer_device->connection);
5262b411b363SPhilipp Reisner 
5263b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
5264b411b363SPhilipp Reisner 
5265be115b69SLars Ellenberg 	if (get_ldev(device)) {
5266be115b69SLars Ellenberg 		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5267be115b69SLars Ellenberg 				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5268be115b69SLars Ellenberg 		put_ldev(device);
5269be115b69SLars Ellenberg 	}
527020ceb2b2SLars Ellenberg 
5271b411b363SPhilipp Reisner 	/* tcp_close and release of sendpage pages can be deferred.  I don't
5272b411b363SPhilipp Reisner 	 * want to use SO_LINGER, because apparently it can be deferred for
5273b411b363SPhilipp Reisner 	 * more than 20 seconds (longest time I checked).
5274b411b363SPhilipp Reisner 	 *
5275b411b363SPhilipp Reisner 	 * Actually we don't care for exactly when the network stack does its
5276b411b363SPhilipp Reisner 	 * put_page(), but release our reference on these pages right here.
5277b411b363SPhilipp Reisner 	 */
5278b30ab791SAndreas Gruenbacher 	i = drbd_free_peer_reqs(device, &device->net_ee);
5279b411b363SPhilipp Reisner 	if (i)
5280d0180171SAndreas Gruenbacher 		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5281b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use_by_net);
5282435f0740SLars Ellenberg 	if (i)
5283d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5284b30ab791SAndreas Gruenbacher 	i = atomic_read(&device->pp_in_use);
5285b411b363SPhilipp Reisner 	if (i)
5286d0180171SAndreas Gruenbacher 		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5287b411b363SPhilipp Reisner 
52880b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->read_ee));
52890b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->active_ee));
52900b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->sync_ee));
52910b0ba1efSAndreas Gruenbacher 	D_ASSERT(device, list_empty(&device->done_ee));
5292b411b363SPhilipp Reisner 
5293360cc740SPhilipp Reisner 	return 0;
5294b411b363SPhilipp Reisner }
5295b411b363SPhilipp Reisner 
5296b411b363SPhilipp Reisner /*
5297b411b363SPhilipp Reisner  * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5298b411b363SPhilipp Reisner  * we can agree on is stored in agreed_pro_version.
5299b411b363SPhilipp Reisner  *
5300b411b363SPhilipp Reisner  * feature flags and the reserved array should be enough room for future
5301b411b363SPhilipp Reisner  * enhancements of the handshake protocol, and possible plugins...
5302b411b363SPhilipp Reisner  *
5303b411b363SPhilipp Reisner  * for now, they are expected to be zero, but ignored.
5304b411b363SPhilipp Reisner  */
5305bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5306b411b363SPhilipp Reisner {
53079f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
53089f5bdc33SAndreas Gruenbacher 	struct p_connection_features *p;
5309b411b363SPhilipp Reisner 
5310bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5311bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
53129f5bdc33SAndreas Gruenbacher 	if (!p)
5313e8d17b01SAndreas Gruenbacher 		return -EIO;
5314b411b363SPhilipp Reisner 	memset(p, 0, sizeof(*p));
5315b411b363SPhilipp Reisner 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5316b411b363SPhilipp Reisner 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
531720c68fdeSLars Ellenberg 	p->feature_flags = cpu_to_be32(PRO_FEATURES);
5318bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5319b411b363SPhilipp Reisner }
5320b411b363SPhilipp Reisner 
5321b411b363SPhilipp Reisner /*
5322b411b363SPhilipp Reisner  * return values:
5323b411b363SPhilipp Reisner  *   1 yes, we have a valid connection
5324b411b363SPhilipp Reisner  *   0 oops, did not work out, please try again
5325b411b363SPhilipp Reisner  *  -1 peer talks different language,
5326b411b363SPhilipp Reisner  *     no point in trying again, please go standalone.
5327b411b363SPhilipp Reisner  */
5328bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5329b411b363SPhilipp Reisner {
5330bde89a9eSAndreas Gruenbacher 	/* ASSERT current == connection->receiver ... */
5331e658983aSAndreas Gruenbacher 	struct p_connection_features *p;
5332e658983aSAndreas Gruenbacher 	const int expect = sizeof(struct p_connection_features);
533377351055SPhilipp Reisner 	struct packet_info pi;
5334a5c31904SAndreas Gruenbacher 	int err;
5335b411b363SPhilipp Reisner 
5336bde89a9eSAndreas Gruenbacher 	err = drbd_send_features(connection);
5337e8d17b01SAndreas Gruenbacher 	if (err)
5338b411b363SPhilipp Reisner 		return 0;
5339b411b363SPhilipp Reisner 
5340bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
534169bc7bc3SAndreas Gruenbacher 	if (err)
5342b411b363SPhilipp Reisner 		return 0;
5343b411b363SPhilipp Reisner 
53446038178eSAndreas Gruenbacher 	if (pi.cmd != P_CONNECTION_FEATURES) {
53451ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
534677351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5347b411b363SPhilipp Reisner 		return -1;
5348b411b363SPhilipp Reisner 	}
5349b411b363SPhilipp Reisner 
535077351055SPhilipp Reisner 	if (pi.size != expect) {
53511ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
535277351055SPhilipp Reisner 		     expect, pi.size);
5353b411b363SPhilipp Reisner 		return -1;
5354b411b363SPhilipp Reisner 	}
5355b411b363SPhilipp Reisner 
5356e658983aSAndreas Gruenbacher 	p = pi.data;
5357bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, p, expect);
5358a5c31904SAndreas Gruenbacher 	if (err)
5359b411b363SPhilipp Reisner 		return 0;
5360b411b363SPhilipp Reisner 
5361b411b363SPhilipp Reisner 	p->protocol_min = be32_to_cpu(p->protocol_min);
5362b411b363SPhilipp Reisner 	p->protocol_max = be32_to_cpu(p->protocol_max);
5363b411b363SPhilipp Reisner 	if (p->protocol_max == 0)
5364b411b363SPhilipp Reisner 		p->protocol_max = p->protocol_min;
5365b411b363SPhilipp Reisner 
5366b411b363SPhilipp Reisner 	if (PRO_VERSION_MAX < p->protocol_min ||
5367b411b363SPhilipp Reisner 	    PRO_VERSION_MIN > p->protocol_max)
5368b411b363SPhilipp Reisner 		goto incompat;
5369b411b363SPhilipp Reisner 
5370bde89a9eSAndreas Gruenbacher 	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
537120c68fdeSLars Ellenberg 	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5372b411b363SPhilipp Reisner 
53731ec861ebSAndreas Gruenbacher 	drbd_info(connection, "Handshake successful: "
5374bde89a9eSAndreas Gruenbacher 	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
5375b411b363SPhilipp Reisner 
5376f31e583aSLars Ellenberg 	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53779104d31aSLars Ellenberg 		  connection->agreed_features,
53789104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53799104d31aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5380f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5381f31e583aSLars Ellenberg 		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53829104d31aSLars Ellenberg 		  connection->agreed_features ? "" : " none");
538392d94ae6SPhilipp Reisner 
5384b411b363SPhilipp Reisner 	return 1;
5385b411b363SPhilipp Reisner 
5386b411b363SPhilipp Reisner  incompat:
53871ec861ebSAndreas Gruenbacher 	drbd_err(connection, "incompatible DRBD dialects: "
5388b411b363SPhilipp Reisner 	    "I support %d-%d, peer supports %d-%d\n",
5389b411b363SPhilipp Reisner 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
5390b411b363SPhilipp Reisner 	    p->protocol_min, p->protocol_max);
5391b411b363SPhilipp Reisner 	return -1;
5392b411b363SPhilipp Reisner }
5393b411b363SPhilipp Reisner 
5394b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5395bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5396b411b363SPhilipp Reisner {
53971ec861ebSAndreas Gruenbacher 	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53981ec861ebSAndreas Gruenbacher 	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5399b10d96cbSJohannes Thoma 	return -1;
5400b411b363SPhilipp Reisner }
5401b411b363SPhilipp Reisner #else
5402b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5403b10d96cbSJohannes Thoma 
5404b10d96cbSJohannes Thoma /* Return value:
5405b10d96cbSJohannes Thoma 	1 - auth succeeded,
5406b10d96cbSJohannes Thoma 	0 - failed, try again (network error),
5407b10d96cbSJohannes Thoma 	-1 - auth failed, don't try again.
5408b10d96cbSJohannes Thoma */
5409b10d96cbSJohannes Thoma 
5410bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5411b411b363SPhilipp Reisner {
54129f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
5413b411b363SPhilipp Reisner 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
5414b411b363SPhilipp Reisner 	char *response = NULL;
5415b411b363SPhilipp Reisner 	char *right_response = NULL;
5416b411b363SPhilipp Reisner 	char *peers_ch = NULL;
541744ed167dSPhilipp Reisner 	unsigned int key_len;
541844ed167dSPhilipp Reisner 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
5419b411b363SPhilipp Reisner 	unsigned int resp_size;
54209534d671SHerbert Xu 	SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
542177351055SPhilipp Reisner 	struct packet_info pi;
542244ed167dSPhilipp Reisner 	struct net_conf *nc;
542369bc7bc3SAndreas Gruenbacher 	int err, rv;
5424b411b363SPhilipp Reisner 
54259f5bdc33SAndreas Gruenbacher 	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
54269f5bdc33SAndreas Gruenbacher 
542744ed167dSPhilipp Reisner 	rcu_read_lock();
5428bde89a9eSAndreas Gruenbacher 	nc = rcu_dereference(connection->net_conf);
542944ed167dSPhilipp Reisner 	key_len = strlen(nc->shared_secret);
543044ed167dSPhilipp Reisner 	memcpy(secret, nc->shared_secret, key_len);
543144ed167dSPhilipp Reisner 	rcu_read_unlock();
543244ed167dSPhilipp Reisner 
54339534d671SHerbert Xu 	desc->tfm = connection->cram_hmac_tfm;
5434b411b363SPhilipp Reisner 
54359534d671SHerbert Xu 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5436b411b363SPhilipp Reisner 	if (rv) {
54379534d671SHerbert Xu 		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5438b10d96cbSJohannes Thoma 		rv = -1;
5439b411b363SPhilipp Reisner 		goto fail;
5440b411b363SPhilipp Reisner 	}
5441b411b363SPhilipp Reisner 
5442b411b363SPhilipp Reisner 	get_random_bytes(my_challenge, CHALLENGE_LEN);
5443b411b363SPhilipp Reisner 
5444bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
5445bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
54469f5bdc33SAndreas Gruenbacher 		rv = 0;
54479f5bdc33SAndreas Gruenbacher 		goto fail;
54489f5bdc33SAndreas Gruenbacher 	}
5449bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54509f5bdc33SAndreas Gruenbacher 				my_challenge, CHALLENGE_LEN);
5451b411b363SPhilipp Reisner 	if (!rv)
5452b411b363SPhilipp Reisner 		goto fail;
5453b411b363SPhilipp Reisner 
5454bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
545569bc7bc3SAndreas Gruenbacher 	if (err) {
5456b411b363SPhilipp Reisner 		rv = 0;
5457b411b363SPhilipp Reisner 		goto fail;
5458b411b363SPhilipp Reisner 	}
5459b411b363SPhilipp Reisner 
546077351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_CHALLENGE) {
54611ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
546277351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
54639049ccd4SLars Ellenberg 		rv = -1;
5464b411b363SPhilipp Reisner 		goto fail;
5465b411b363SPhilipp Reisner 	}
5466b411b363SPhilipp Reisner 
546777351055SPhilipp Reisner 	if (pi.size > CHALLENGE_LEN * 2) {
54681ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthChallenge payload too big.\n");
5469b10d96cbSJohannes Thoma 		rv = -1;
5470b411b363SPhilipp Reisner 		goto fail;
5471b411b363SPhilipp Reisner 	}
5472b411b363SPhilipp Reisner 
547367cca286SPhilipp Reisner 	if (pi.size < CHALLENGE_LEN) {
547467cca286SPhilipp Reisner 		drbd_err(connection, "AuthChallenge payload too small.\n");
547567cca286SPhilipp Reisner 		rv = -1;
547667cca286SPhilipp Reisner 		goto fail;
547767cca286SPhilipp Reisner 	}
547867cca286SPhilipp Reisner 
547977351055SPhilipp Reisner 	peers_ch = kmalloc(pi.size, GFP_NOIO);
5480b411b363SPhilipp Reisner 	if (peers_ch == NULL) {
54811ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of peers_ch failed\n");
5482b10d96cbSJohannes Thoma 		rv = -1;
5483b411b363SPhilipp Reisner 		goto fail;
5484b411b363SPhilipp Reisner 	}
5485b411b363SPhilipp Reisner 
5486bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5487a5c31904SAndreas Gruenbacher 	if (err) {
5488b411b363SPhilipp Reisner 		rv = 0;
5489b411b363SPhilipp Reisner 		goto fail;
5490b411b363SPhilipp Reisner 	}
5491b411b363SPhilipp Reisner 
549267cca286SPhilipp Reisner 	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
549367cca286SPhilipp Reisner 		drbd_err(connection, "Peer presented the same challenge!\n");
549467cca286SPhilipp Reisner 		rv = -1;
549567cca286SPhilipp Reisner 		goto fail;
549667cca286SPhilipp Reisner 	}
549767cca286SPhilipp Reisner 
54989534d671SHerbert Xu 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5499b411b363SPhilipp Reisner 	response = kmalloc(resp_size, GFP_NOIO);
5500b411b363SPhilipp Reisner 	if (response == NULL) {
55011ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of response failed\n");
5502b10d96cbSJohannes Thoma 		rv = -1;
5503b411b363SPhilipp Reisner 		goto fail;
5504b411b363SPhilipp Reisner 	}
5505b411b363SPhilipp Reisner 
55069534d671SHerbert Xu 	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5507b411b363SPhilipp Reisner 	if (rv) {
55081ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5509b10d96cbSJohannes Thoma 		rv = -1;
5510b411b363SPhilipp Reisner 		goto fail;
5511b411b363SPhilipp Reisner 	}
5512b411b363SPhilipp Reisner 
5513bde89a9eSAndreas Gruenbacher 	if (!conn_prepare_command(connection, sock)) {
55149f5bdc33SAndreas Gruenbacher 		rv = 0;
55159f5bdc33SAndreas Gruenbacher 		goto fail;
55169f5bdc33SAndreas Gruenbacher 	}
5517bde89a9eSAndreas Gruenbacher 	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
55189f5bdc33SAndreas Gruenbacher 				response, resp_size);
5519b411b363SPhilipp Reisner 	if (!rv)
5520b411b363SPhilipp Reisner 		goto fail;
5521b411b363SPhilipp Reisner 
5522bde89a9eSAndreas Gruenbacher 	err = drbd_recv_header(connection, &pi);
552369bc7bc3SAndreas Gruenbacher 	if (err) {
5524b411b363SPhilipp Reisner 		rv = 0;
5525b411b363SPhilipp Reisner 		goto fail;
5526b411b363SPhilipp Reisner 	}
5527b411b363SPhilipp Reisner 
552877351055SPhilipp Reisner 	if (pi.cmd != P_AUTH_RESPONSE) {
55291ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
553077351055SPhilipp Reisner 			 cmdname(pi.cmd), pi.cmd);
5531b411b363SPhilipp Reisner 		rv = 0;
5532b411b363SPhilipp Reisner 		goto fail;
5533b411b363SPhilipp Reisner 	}
5534b411b363SPhilipp Reisner 
553577351055SPhilipp Reisner 	if (pi.size != resp_size) {
55361ec861ebSAndreas Gruenbacher 		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5537b411b363SPhilipp Reisner 		rv = 0;
5538b411b363SPhilipp Reisner 		goto fail;
5539b411b363SPhilipp Reisner 	}
5540b411b363SPhilipp Reisner 
5541bde89a9eSAndreas Gruenbacher 	err = drbd_recv_all_warn(connection, response , resp_size);
5542a5c31904SAndreas Gruenbacher 	if (err) {
5543b411b363SPhilipp Reisner 		rv = 0;
5544b411b363SPhilipp Reisner 		goto fail;
5545b411b363SPhilipp Reisner 	}
5546b411b363SPhilipp Reisner 
5547b411b363SPhilipp Reisner 	right_response = kmalloc(resp_size, GFP_NOIO);
55482d1ee87dSJulia Lawall 	if (right_response == NULL) {
55491ec861ebSAndreas Gruenbacher 		drbd_err(connection, "kmalloc of right_response failed\n");
5550b10d96cbSJohannes Thoma 		rv = -1;
5551b411b363SPhilipp Reisner 		goto fail;
5552b411b363SPhilipp Reisner 	}
5553b411b363SPhilipp Reisner 
55549534d671SHerbert Xu 	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55559534d671SHerbert Xu 				 right_response);
5556b411b363SPhilipp Reisner 	if (rv) {
55571ec861ebSAndreas Gruenbacher 		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5558b10d96cbSJohannes Thoma 		rv = -1;
5559b411b363SPhilipp Reisner 		goto fail;
5560b411b363SPhilipp Reisner 	}
5561b411b363SPhilipp Reisner 
5562b411b363SPhilipp Reisner 	rv = !memcmp(response, right_response, resp_size);
5563b411b363SPhilipp Reisner 
5564b411b363SPhilipp Reisner 	if (rv)
55651ec861ebSAndreas Gruenbacher 		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
556644ed167dSPhilipp Reisner 		     resp_size);
5567b10d96cbSJohannes Thoma 	else
5568b10d96cbSJohannes Thoma 		rv = -1;
5569b411b363SPhilipp Reisner 
5570b411b363SPhilipp Reisner  fail:
5571b411b363SPhilipp Reisner 	kfree(peers_ch);
5572b411b363SPhilipp Reisner 	kfree(response);
5573b411b363SPhilipp Reisner 	kfree(right_response);
55749534d671SHerbert Xu 	shash_desc_zero(desc);
5575b411b363SPhilipp Reisner 
5576b411b363SPhilipp Reisner 	return rv;
5577b411b363SPhilipp Reisner }
5578b411b363SPhilipp Reisner #endif
5579b411b363SPhilipp Reisner 
55808fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5581b411b363SPhilipp Reisner {
5582bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
5583b411b363SPhilipp Reisner 	int h;
5584b411b363SPhilipp Reisner 
55851ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver (re)started\n");
5586b411b363SPhilipp Reisner 
5587b411b363SPhilipp Reisner 	do {
5588bde89a9eSAndreas Gruenbacher 		h = conn_connect(connection);
5589b411b363SPhilipp Reisner 		if (h == 0) {
5590bde89a9eSAndreas Gruenbacher 			conn_disconnect(connection);
559120ee6390SPhilipp Reisner 			schedule_timeout_interruptible(HZ);
5592b411b363SPhilipp Reisner 		}
5593b411b363SPhilipp Reisner 		if (h == -1) {
55941ec861ebSAndreas Gruenbacher 			drbd_warn(connection, "Discarding network configuration.\n");
5595bde89a9eSAndreas Gruenbacher 			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5596b411b363SPhilipp Reisner 		}
5597b411b363SPhilipp Reisner 	} while (h == 0);
5598b411b363SPhilipp Reisner 
5599c51a0ef3SLars Ellenberg 	if (h > 0) {
5600c51a0ef3SLars Ellenberg 		blk_start_plug(&connection->receiver_plug);
5601bde89a9eSAndreas Gruenbacher 		drbdd(connection);
5602c51a0ef3SLars Ellenberg 		blk_finish_plug(&connection->receiver_plug);
5603c51a0ef3SLars Ellenberg 	}
5604b411b363SPhilipp Reisner 
5605bde89a9eSAndreas Gruenbacher 	conn_disconnect(connection);
5606b411b363SPhilipp Reisner 
56071ec861ebSAndreas Gruenbacher 	drbd_info(connection, "receiver terminated\n");
5608b411b363SPhilipp Reisner 	return 0;
5609b411b363SPhilipp Reisner }
5610b411b363SPhilipp Reisner 
5611b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5612b411b363SPhilipp Reisner 
5613bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5614b411b363SPhilipp Reisner {
5615e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5616b411b363SPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5617b411b363SPhilipp Reisner 
5618b411b363SPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5619bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5620b411b363SPhilipp Reisner 	} else {
5621bde89a9eSAndreas Gruenbacher 		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
56221ec861ebSAndreas Gruenbacher 		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5623fc3b10a4SPhilipp Reisner 			 drbd_set_st_err_str(retcode), retcode);
5624fc3b10a4SPhilipp Reisner 	}
5625bde89a9eSAndreas Gruenbacher 	wake_up(&connection->ping_wait);
5626e4f78edeSPhilipp Reisner 
56272735a594SAndreas Gruenbacher 	return 0;
5628fc3b10a4SPhilipp Reisner }
5629e4f78edeSPhilipp Reisner 
5630bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5631e4f78edeSPhilipp Reisner {
56329f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5633b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5634e658983aSAndreas Gruenbacher 	struct p_req_state_reply *p = pi->data;
5635e4f78edeSPhilipp Reisner 	int retcode = be32_to_cpu(p->retcode);
5636e4f78edeSPhilipp Reisner 
56379f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56389f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56392735a594SAndreas Gruenbacher 		return -EIO;
56409f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56411952e916SAndreas Gruenbacher 
5642bde89a9eSAndreas Gruenbacher 	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56430b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, connection->agreed_pro_version < 100);
5644bde89a9eSAndreas Gruenbacher 		return got_conn_RqSReply(connection, pi);
56454d0fc3fdSPhilipp Reisner 	}
56464d0fc3fdSPhilipp Reisner 
5647e4f78edeSPhilipp Reisner 	if (retcode >= SS_SUCCESS) {
5648b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5649e4f78edeSPhilipp Reisner 	} else {
5650b30ab791SAndreas Gruenbacher 		set_bit(CL_ST_CHG_FAIL, &device->flags);
5651d0180171SAndreas Gruenbacher 		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5652b411b363SPhilipp Reisner 			drbd_set_st_err_str(retcode), retcode);
5653b411b363SPhilipp Reisner 	}
5654b30ab791SAndreas Gruenbacher 	wake_up(&device->state_wait);
5655b411b363SPhilipp Reisner 
56562735a594SAndreas Gruenbacher 	return 0;
5657b411b363SPhilipp Reisner }
5658b411b363SPhilipp Reisner 
5659bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5660b411b363SPhilipp Reisner {
5661bde89a9eSAndreas Gruenbacher 	return drbd_send_ping_ack(connection);
5662b411b363SPhilipp Reisner 
5663b411b363SPhilipp Reisner }
5664b411b363SPhilipp Reisner 
5665bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5666b411b363SPhilipp Reisner {
5667b411b363SPhilipp Reisner 	/* restore idle timeout */
5668bde89a9eSAndreas Gruenbacher 	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5669bde89a9eSAndreas Gruenbacher 	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5670bde89a9eSAndreas Gruenbacher 		wake_up(&connection->ping_wait);
5671b411b363SPhilipp Reisner 
56722735a594SAndreas Gruenbacher 	return 0;
5673b411b363SPhilipp Reisner }
5674b411b363SPhilipp Reisner 
5675bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5676b411b363SPhilipp Reisner {
56779f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5678b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5679e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5680b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5681b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5682b411b363SPhilipp Reisner 
56839f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
56849f4fe9adSAndreas Gruenbacher 	if (!peer_device)
56852735a594SAndreas Gruenbacher 		return -EIO;
56869f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
56871952e916SAndreas Gruenbacher 
56889f4fe9adSAndreas Gruenbacher 	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5689b411b363SPhilipp Reisner 
569069a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5691b411b363SPhilipp Reisner 
5692b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
5693b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5694b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5695b411b363SPhilipp Reisner 		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5696b30ab791SAndreas Gruenbacher 		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5697b30ab791SAndreas Gruenbacher 		put_ldev(device);
56981d53f09eSLars Ellenberg 	}
5699b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5700b30ab791SAndreas Gruenbacher 	atomic_add(blksize >> 9, &device->rs_sect_in);
5701b411b363SPhilipp Reisner 
57022735a594SAndreas Gruenbacher 	return 0;
5703b411b363SPhilipp Reisner }
5704b411b363SPhilipp Reisner 
5705bc9c5c41SAndreas Gruenbacher static int
5706b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5707bc9c5c41SAndreas Gruenbacher 			      struct rb_root *root, const char *func,
5708bc9c5c41SAndreas Gruenbacher 			      enum drbd_req_event what, bool missing_ok)
5709b411b363SPhilipp Reisner {
5710b411b363SPhilipp Reisner 	struct drbd_request *req;
5711b411b363SPhilipp Reisner 	struct bio_and_error m;
5712b411b363SPhilipp Reisner 
57130500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
5714b30ab791SAndreas Gruenbacher 	req = find_request(device, root, id, sector, missing_ok, func);
5715b411b363SPhilipp Reisner 	if (unlikely(!req)) {
57160500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
571785997675SAndreas Gruenbacher 		return -EIO;
5718b411b363SPhilipp Reisner 	}
5719b411b363SPhilipp Reisner 	__req_mod(req, what, &m);
57200500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
5721b411b363SPhilipp Reisner 
5722b411b363SPhilipp Reisner 	if (m.bio)
5723b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
572485997675SAndreas Gruenbacher 	return 0;
5725b411b363SPhilipp Reisner }
5726b411b363SPhilipp Reisner 
5727bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5728b411b363SPhilipp Reisner {
57299f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5730b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5731e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5732b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5733b411b363SPhilipp Reisner 	int blksize = be32_to_cpu(p->blksize);
5734b411b363SPhilipp Reisner 	enum drbd_req_event what;
5735b411b363SPhilipp Reisner 
57369f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57379f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57382735a594SAndreas Gruenbacher 		return -EIO;
57399f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
57401952e916SAndreas Gruenbacher 
574169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5742b411b363SPhilipp Reisner 
5743579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5744b30ab791SAndreas Gruenbacher 		drbd_set_in_sync(device, sector, blksize);
5745b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
57462735a594SAndreas Gruenbacher 		return 0;
5747b411b363SPhilipp Reisner 	}
5748e05e1e59SAndreas Gruenbacher 	switch (pi->cmd) {
5749b411b363SPhilipp Reisner 	case P_RS_WRITE_ACK:
57508554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER_AND_SIS;
5751b411b363SPhilipp Reisner 		break;
5752b411b363SPhilipp Reisner 	case P_WRITE_ACK:
57538554df1cSAndreas Gruenbacher 		what = WRITE_ACKED_BY_PEER;
5754b411b363SPhilipp Reisner 		break;
5755b411b363SPhilipp Reisner 	case P_RECV_ACK:
57568554df1cSAndreas Gruenbacher 		what = RECV_ACKED_BY_PEER;
5757b411b363SPhilipp Reisner 		break;
5758d4dabbe2SLars Ellenberg 	case P_SUPERSEDED:
5759d4dabbe2SLars Ellenberg 		what = CONFLICT_RESOLVED;
57607be8da07SAndreas Gruenbacher 		break;
57617be8da07SAndreas Gruenbacher 	case P_RETRY_WRITE:
57627be8da07SAndreas Gruenbacher 		what = POSTPONE_WRITE;
5763b411b363SPhilipp Reisner 		break;
5764b411b363SPhilipp Reisner 	default:
57652735a594SAndreas Gruenbacher 		BUG();
5766b411b363SPhilipp Reisner 	}
5767b411b363SPhilipp Reisner 
5768b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5769b30ab791SAndreas Gruenbacher 					     &device->write_requests, __func__,
5770bc9c5c41SAndreas Gruenbacher 					     what, false);
5771b411b363SPhilipp Reisner }
5772b411b363SPhilipp Reisner 
5773bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5774b411b363SPhilipp Reisner {
57759f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5776b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5777e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5778b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
57792deb8336SPhilipp Reisner 	int size = be32_to_cpu(p->blksize);
578085997675SAndreas Gruenbacher 	int err;
5781b411b363SPhilipp Reisner 
57829f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
57839f4fe9adSAndreas Gruenbacher 	if (!peer_device)
57842735a594SAndreas Gruenbacher 		return -EIO;
57859f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5786b411b363SPhilipp Reisner 
578769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5788b411b363SPhilipp Reisner 
5789579b57edSAndreas Gruenbacher 	if (p->block_id == ID_SYNCER) {
5790b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
5791b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, sector, size);
57922735a594SAndreas Gruenbacher 		return 0;
5793b411b363SPhilipp Reisner 	}
57942deb8336SPhilipp Reisner 
5795b30ab791SAndreas Gruenbacher 	err = validate_req_change_req_state(device, p->block_id, sector,
5796b30ab791SAndreas Gruenbacher 					    &device->write_requests, __func__,
5797303d1448SPhilipp Reisner 					    NEG_ACKED, true);
579885997675SAndreas Gruenbacher 	if (err) {
57992deb8336SPhilipp Reisner 		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
58002deb8336SPhilipp Reisner 		   The master bio might already be completed, therefore the
5801c3afd8f5SAndreas Gruenbacher 		   request is no longer in the collision hash. */
58022deb8336SPhilipp Reisner 		/* In Protocol B we might already have got a P_RECV_ACK
58032deb8336SPhilipp Reisner 		   but then get a P_NEG_ACK afterwards. */
5804b30ab791SAndreas Gruenbacher 		drbd_set_out_of_sync(device, sector, size);
58052deb8336SPhilipp Reisner 	}
58062735a594SAndreas Gruenbacher 	return 0;
5807b411b363SPhilipp Reisner }
5808b411b363SPhilipp Reisner 
5809bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5810b411b363SPhilipp Reisner {
58119f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5812b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5813e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
5814b411b363SPhilipp Reisner 	sector_t sector = be64_to_cpu(p->sector);
5815b411b363SPhilipp Reisner 
58169f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58179f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58182735a594SAndreas Gruenbacher 		return -EIO;
58199f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
58201952e916SAndreas Gruenbacher 
582169a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
58227be8da07SAndreas Gruenbacher 
5823d0180171SAndreas Gruenbacher 	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5824b411b363SPhilipp Reisner 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
5825b411b363SPhilipp Reisner 
5826b30ab791SAndreas Gruenbacher 	return validate_req_change_req_state(device, p->block_id, sector,
5827b30ab791SAndreas Gruenbacher 					     &device->read_requests, __func__,
58288554df1cSAndreas Gruenbacher 					     NEG_ACKED, false);
5829b411b363SPhilipp Reisner }
5830b411b363SPhilipp Reisner 
5831bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5832b411b363SPhilipp Reisner {
58339f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5834b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5835b411b363SPhilipp Reisner 	sector_t sector;
5836b411b363SPhilipp Reisner 	int size;
5837e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
58381952e916SAndreas Gruenbacher 
58399f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
58409f4fe9adSAndreas Gruenbacher 	if (!peer_device)
58412735a594SAndreas Gruenbacher 		return -EIO;
58429f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
5843b411b363SPhilipp Reisner 
5844b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5845b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5846b411b363SPhilipp Reisner 
584769a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5848b411b363SPhilipp Reisner 
5849b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5850b411b363SPhilipp Reisner 
5851b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
5852b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, sector);
5853e05e1e59SAndreas Gruenbacher 		switch (pi->cmd) {
5854d612d309SPhilipp Reisner 		case P_NEG_RS_DREPLY:
5855b30ab791SAndreas Gruenbacher 			drbd_rs_failed_io(device, sector, size);
5856d612d309SPhilipp Reisner 		case P_RS_CANCEL:
5857d612d309SPhilipp Reisner 			break;
5858d612d309SPhilipp Reisner 		default:
58592735a594SAndreas Gruenbacher 			BUG();
5860d612d309SPhilipp Reisner 		}
5861b30ab791SAndreas Gruenbacher 		put_ldev(device);
5862b411b363SPhilipp Reisner 	}
5863b411b363SPhilipp Reisner 
58642735a594SAndreas Gruenbacher 	return 0;
5865b411b363SPhilipp Reisner }
5866b411b363SPhilipp Reisner 
5867bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5868b411b363SPhilipp Reisner {
5869e658983aSAndreas Gruenbacher 	struct p_barrier_ack *p = pi->data;
5870c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
58719ed57dcbSLars Ellenberg 	int vnr;
5872b411b363SPhilipp Reisner 
5873bde89a9eSAndreas Gruenbacher 	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5874b411b363SPhilipp Reisner 
58759ed57dcbSLars Ellenberg 	rcu_read_lock();
5876c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5877c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
5878c06ece6bSAndreas Gruenbacher 
5879b30ab791SAndreas Gruenbacher 		if (device->state.conn == C_AHEAD &&
5880b30ab791SAndreas Gruenbacher 		    atomic_read(&device->ap_in_flight) == 0 &&
5881b30ab791SAndreas Gruenbacher 		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5882b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ;
5883b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
5884c4752ef1SPhilipp Reisner 		}
58859ed57dcbSLars Ellenberg 	}
58869ed57dcbSLars Ellenberg 	rcu_read_unlock();
5887c4752ef1SPhilipp Reisner 
58882735a594SAndreas Gruenbacher 	return 0;
5889b411b363SPhilipp Reisner }
5890b411b363SPhilipp Reisner 
5891bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5892b411b363SPhilipp Reisner {
58939f4fe9adSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
5894b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
5895e658983aSAndreas Gruenbacher 	struct p_block_ack *p = pi->data;
589684b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
5897b411b363SPhilipp Reisner 	sector_t sector;
5898b411b363SPhilipp Reisner 	int size;
5899b411b363SPhilipp Reisner 
59009f4fe9adSAndreas Gruenbacher 	peer_device = conn_peer_device(connection, pi->vnr);
59019f4fe9adSAndreas Gruenbacher 	if (!peer_device)
59022735a594SAndreas Gruenbacher 		return -EIO;
59039f4fe9adSAndreas Gruenbacher 	device = peer_device->device;
59041952e916SAndreas Gruenbacher 
5905b411b363SPhilipp Reisner 	sector = be64_to_cpu(p->sector);
5906b411b363SPhilipp Reisner 	size = be32_to_cpu(p->blksize);
5907b411b363SPhilipp Reisner 
590869a22773SAndreas Gruenbacher 	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5909b411b363SPhilipp Reisner 
5910b411b363SPhilipp Reisner 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5911b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
5912b411b363SPhilipp Reisner 	else
5913b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
5914b411b363SPhilipp Reisner 
5915b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
59162735a594SAndreas Gruenbacher 		return 0;
59171d53f09eSLars Ellenberg 
5918b30ab791SAndreas Gruenbacher 	drbd_rs_complete_io(device, sector);
5919b30ab791SAndreas Gruenbacher 	dec_rs_pending(device);
5920b411b363SPhilipp Reisner 
5921b30ab791SAndreas Gruenbacher 	--device->ov_left;
5922ea5442afSLars Ellenberg 
5923ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
5924b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
5925b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
5926ea5442afSLars Ellenberg 
5927b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0) {
592884b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(*dw), GFP_NOIO);
592984b8c06bSAndreas Gruenbacher 		if (dw) {
593084b8c06bSAndreas Gruenbacher 			dw->w.cb = w_ov_finished;
593184b8c06bSAndreas Gruenbacher 			dw->device = device;
593284b8c06bSAndreas Gruenbacher 			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5933b411b363SPhilipp Reisner 		} else {
593484b8c06bSAndreas Gruenbacher 			drbd_err(device, "kmalloc(dw) failed.");
5935b30ab791SAndreas Gruenbacher 			ov_out_of_sync_print(device);
5936b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
5937b411b363SPhilipp Reisner 		}
5938b411b363SPhilipp Reisner 	}
5939b30ab791SAndreas Gruenbacher 	put_ldev(device);
59402735a594SAndreas Gruenbacher 	return 0;
5941b411b363SPhilipp Reisner }
5942b411b363SPhilipp Reisner 
5943bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59440ced55a3SPhilipp Reisner {
59452735a594SAndreas Gruenbacher 	return 0;
59460ced55a3SPhilipp Reisner }
59470ced55a3SPhilipp Reisner 
5948668700b4SPhilipp Reisner struct meta_sock_cmd {
5949b411b363SPhilipp Reisner 	size_t pkt_size;
5950bde89a9eSAndreas Gruenbacher 	int (*fn)(struct drbd_connection *connection, struct packet_info *);
5951b411b363SPhilipp Reisner };
5952b411b363SPhilipp Reisner 
5953668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5954668700b4SPhilipp Reisner {
5955668700b4SPhilipp Reisner 	long t;
5956668700b4SPhilipp Reisner 	struct net_conf *nc;
5957668700b4SPhilipp Reisner 
5958668700b4SPhilipp Reisner 	rcu_read_lock();
5959668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
5960668700b4SPhilipp Reisner 	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5961668700b4SPhilipp Reisner 	rcu_read_unlock();
5962668700b4SPhilipp Reisner 
5963668700b4SPhilipp Reisner 	t *= HZ;
5964668700b4SPhilipp Reisner 	if (ping_timeout)
5965668700b4SPhilipp Reisner 		t /= 10;
5966668700b4SPhilipp Reisner 
5967668700b4SPhilipp Reisner 	connection->meta.socket->sk->sk_rcvtimeo = t;
5968668700b4SPhilipp Reisner }
5969668700b4SPhilipp Reisner 
5970668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5971668700b4SPhilipp Reisner {
5972668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 1);
5973668700b4SPhilipp Reisner }
5974668700b4SPhilipp Reisner 
5975668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5976668700b4SPhilipp Reisner {
5977668700b4SPhilipp Reisner 	set_rcvtimeo(connection, 0);
5978668700b4SPhilipp Reisner }
5979668700b4SPhilipp Reisner 
5980668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5981e658983aSAndreas Gruenbacher 	[P_PING]	    = { 0, got_Ping },
5982e658983aSAndreas Gruenbacher 	[P_PING_ACK]	    = { 0, got_PingAck },
5983b411b363SPhilipp Reisner 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5984b411b363SPhilipp Reisner 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
5985b411b363SPhilipp Reisner 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
5986d4dabbe2SLars Ellenberg 	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
5987b411b363SPhilipp Reisner 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
5988b411b363SPhilipp Reisner 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
5989b411b363SPhilipp Reisner 	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
5990b411b363SPhilipp Reisner 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
5991b411b363SPhilipp Reisner 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
5992b411b363SPhilipp Reisner 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5993b411b363SPhilipp Reisner 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
599402918be2SPhilipp Reisner 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
5995d612d309SPhilipp Reisner 	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
59961952e916SAndreas Gruenbacher 	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59971952e916SAndreas Gruenbacher 	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
5998b411b363SPhilipp Reisner };
5999b411b363SPhilipp Reisner 
60001c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
6001b411b363SPhilipp Reisner {
6002bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
6003668700b4SPhilipp Reisner 	struct meta_sock_cmd *cmd = NULL;
600477351055SPhilipp Reisner 	struct packet_info pi;
6005668700b4SPhilipp Reisner 	unsigned long pre_recv_jif;
6006257d0af6SPhilipp Reisner 	int rv;
6007bde89a9eSAndreas Gruenbacher 	void *buf    = connection->meta.rbuf;
6008b411b363SPhilipp Reisner 	int received = 0;
6009bde89a9eSAndreas Gruenbacher 	unsigned int header_size = drbd_header_size(connection);
601052b061a4SAndreas Gruenbacher 	int expect   = header_size;
601144ed167dSPhilipp Reisner 	bool ping_timeout_active = false;
60123990e04dSPhilipp Reisner 	struct sched_param param = { .sched_priority = 2 };
6013b411b363SPhilipp Reisner 
60143990e04dSPhilipp Reisner 	rv = sched_setscheduler(current, SCHED_RR, &param);
60153990e04dSPhilipp Reisner 	if (rv < 0)
6016668700b4SPhilipp Reisner 		drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
6017b411b363SPhilipp Reisner 
6018e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
601980822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
602044ed167dSPhilipp Reisner 
6021668700b4SPhilipp Reisner 		conn_reclaim_net_peer_reqs(connection);
602244ed167dSPhilipp Reisner 
6023bde89a9eSAndreas Gruenbacher 		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
6024bde89a9eSAndreas Gruenbacher 			if (drbd_send_ping(connection)) {
60251ec861ebSAndreas Gruenbacher 				drbd_err(connection, "drbd_send_ping has failed\n");
6026841ce241SAndreas Gruenbacher 				goto reconnect;
6027841ce241SAndreas Gruenbacher 			}
6028668700b4SPhilipp Reisner 			set_ping_timeout(connection);
602944ed167dSPhilipp Reisner 			ping_timeout_active = true;
6030b411b363SPhilipp Reisner 		}
6031b411b363SPhilipp Reisner 
6032668700b4SPhilipp Reisner 		pre_recv_jif = jiffies;
6033bde89a9eSAndreas Gruenbacher 		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
6034b411b363SPhilipp Reisner 
6035b411b363SPhilipp Reisner 		/* Note:
6036b411b363SPhilipp Reisner 		 * -EINTR	 (on meta) we got a signal
6037b411b363SPhilipp Reisner 		 * -EAGAIN	 (on meta) rcvtimeo expired
6038b411b363SPhilipp Reisner 		 * -ECONNRESET	 other side closed the connection
6039b411b363SPhilipp Reisner 		 * -ERESTARTSYS  (on data) we got a signal
6040b411b363SPhilipp Reisner 		 * rv <  0	 other than above: unexpected error!
6041b411b363SPhilipp Reisner 		 * rv == expected: full header or command
6042b411b363SPhilipp Reisner 		 * rv <  expected: "woken" by signal during receive
6043b411b363SPhilipp Reisner 		 * rv == 0	 : "connection shut down by peer"
6044b411b363SPhilipp Reisner 		 */
6045b411b363SPhilipp Reisner 		if (likely(rv > 0)) {
6046b411b363SPhilipp Reisner 			received += rv;
6047b411b363SPhilipp Reisner 			buf	 += rv;
6048b411b363SPhilipp Reisner 		} else if (rv == 0) {
6049bde89a9eSAndreas Gruenbacher 			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6050b66623e3SPhilipp Reisner 				long t;
6051b66623e3SPhilipp Reisner 				rcu_read_lock();
6052bde89a9eSAndreas Gruenbacher 				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6053b66623e3SPhilipp Reisner 				rcu_read_unlock();
6054b66623e3SPhilipp Reisner 
6055bde89a9eSAndreas Gruenbacher 				t = wait_event_timeout(connection->ping_wait,
6056bde89a9eSAndreas Gruenbacher 						       connection->cstate < C_WF_REPORT_PARAMS,
6057b66623e3SPhilipp Reisner 						       t);
6058599377acSPhilipp Reisner 				if (t)
6059599377acSPhilipp Reisner 					break;
6060599377acSPhilipp Reisner 			}
60611ec861ebSAndreas Gruenbacher 			drbd_err(connection, "meta connection shut down by peer.\n");
6062b411b363SPhilipp Reisner 			goto reconnect;
6063b411b363SPhilipp Reisner 		} else if (rv == -EAGAIN) {
6064cb6518cbSLars Ellenberg 			/* If the data socket received something meanwhile,
6065cb6518cbSLars Ellenberg 			 * that is good enough: peer is still alive. */
6066668700b4SPhilipp Reisner 			if (time_after(connection->last_received, pre_recv_jif))
6067cb6518cbSLars Ellenberg 				continue;
6068f36af18cSLars Ellenberg 			if (ping_timeout_active) {
60691ec861ebSAndreas Gruenbacher 				drbd_err(connection, "PingAck did not arrive in time.\n");
6070b411b363SPhilipp Reisner 				goto reconnect;
6071b411b363SPhilipp Reisner 			}
6072bde89a9eSAndreas Gruenbacher 			set_bit(SEND_PING, &connection->flags);
6073b411b363SPhilipp Reisner 			continue;
6074b411b363SPhilipp Reisner 		} else if (rv == -EINTR) {
6075668700b4SPhilipp Reisner 			/* maybe drbd_thread_stop(): the while condition will notice.
6076668700b4SPhilipp Reisner 			 * maybe woken for send_ping: we'll send a ping above,
6077668700b4SPhilipp Reisner 			 * and change the rcvtimeo */
6078668700b4SPhilipp Reisner 			flush_signals(current);
6079b411b363SPhilipp Reisner 			continue;
6080b411b363SPhilipp Reisner 		} else {
60811ec861ebSAndreas Gruenbacher 			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6082b411b363SPhilipp Reisner 			goto reconnect;
6083b411b363SPhilipp Reisner 		}
6084b411b363SPhilipp Reisner 
6085b411b363SPhilipp Reisner 		if (received == expect && cmd == NULL) {
6086bde89a9eSAndreas Gruenbacher 			if (decode_header(connection, connection->meta.rbuf, &pi))
6087b411b363SPhilipp Reisner 				goto reconnect;
6088668700b4SPhilipp Reisner 			cmd = &ack_receiver_tbl[pi.cmd];
6089668700b4SPhilipp Reisner 			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60901ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60912fcb8f30SAndreas Gruenbacher 					 cmdname(pi.cmd), pi.cmd);
6092b411b363SPhilipp Reisner 				goto disconnect;
6093b411b363SPhilipp Reisner 			}
6094e658983aSAndreas Gruenbacher 			expect = header_size + cmd->pkt_size;
609552b061a4SAndreas Gruenbacher 			if (pi.size != expect - header_size) {
60961ec861ebSAndreas Gruenbacher 				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
609777351055SPhilipp Reisner 					pi.cmd, pi.size);
6098b411b363SPhilipp Reisner 				goto reconnect;
6099b411b363SPhilipp Reisner 			}
6100257d0af6SPhilipp Reisner 		}
6101b411b363SPhilipp Reisner 		if (received == expect) {
61022735a594SAndreas Gruenbacher 			bool err;
6103a4fbda8eSPhilipp Reisner 
6104bde89a9eSAndreas Gruenbacher 			err = cmd->fn(connection, &pi);
61052735a594SAndreas Gruenbacher 			if (err) {
6106d75f773cSSakari Ailus 				drbd_err(connection, "%ps failed\n", cmd->fn);
6107b411b363SPhilipp Reisner 				goto reconnect;
61081952e916SAndreas Gruenbacher 			}
6109b411b363SPhilipp Reisner 
6110bde89a9eSAndreas Gruenbacher 			connection->last_received = jiffies;
6111f36af18cSLars Ellenberg 
6112668700b4SPhilipp Reisner 			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6113668700b4SPhilipp Reisner 				set_idle_timeout(connection);
611444ed167dSPhilipp Reisner 				ping_timeout_active = false;
611544ed167dSPhilipp Reisner 			}
6116b411b363SPhilipp Reisner 
6117bde89a9eSAndreas Gruenbacher 			buf	 = connection->meta.rbuf;
6118b411b363SPhilipp Reisner 			received = 0;
611952b061a4SAndreas Gruenbacher 			expect	 = header_size;
6120b411b363SPhilipp Reisner 			cmd	 = NULL;
6121b411b363SPhilipp Reisner 		}
6122b411b363SPhilipp Reisner 	}
6123b411b363SPhilipp Reisner 
6124b411b363SPhilipp Reisner 	if (0) {
6125b411b363SPhilipp Reisner reconnect:
6126bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6127bde89a9eSAndreas Gruenbacher 		conn_md_sync(connection);
6128b411b363SPhilipp Reisner 	}
6129b411b363SPhilipp Reisner 	if (0) {
6130b411b363SPhilipp Reisner disconnect:
6131bde89a9eSAndreas Gruenbacher 		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6132b411b363SPhilipp Reisner 	}
6133b411b363SPhilipp Reisner 
6134668700b4SPhilipp Reisner 	drbd_info(connection, "ack_receiver terminated\n");
6135b411b363SPhilipp Reisner 
6136b411b363SPhilipp Reisner 	return 0;
6137b411b363SPhilipp Reisner }
6138668700b4SPhilipp Reisner 
6139668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6140668700b4SPhilipp Reisner {
6141668700b4SPhilipp Reisner 	struct drbd_peer_device *peer_device =
6142668700b4SPhilipp Reisner 		container_of(ws, struct drbd_peer_device, send_acks_work);
6143668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
6144668700b4SPhilipp Reisner 	struct drbd_device *device = peer_device->device;
6145668700b4SPhilipp Reisner 	struct net_conf *nc;
6146668700b4SPhilipp Reisner 	int tcp_cork, err;
6147668700b4SPhilipp Reisner 
6148668700b4SPhilipp Reisner 	rcu_read_lock();
6149668700b4SPhilipp Reisner 	nc = rcu_dereference(connection->net_conf);
6150668700b4SPhilipp Reisner 	tcp_cork = nc->tcp_cork;
6151668700b4SPhilipp Reisner 	rcu_read_unlock();
6152668700b4SPhilipp Reisner 
6153668700b4SPhilipp Reisner 	if (tcp_cork)
6154668700b4SPhilipp Reisner 		drbd_tcp_cork(connection->meta.socket);
6155668700b4SPhilipp Reisner 
6156668700b4SPhilipp Reisner 	err = drbd_finish_peer_reqs(device);
6157668700b4SPhilipp Reisner 	kref_put(&device->kref, drbd_destroy_device);
6158668700b4SPhilipp Reisner 	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6159668700b4SPhilipp Reisner 	   struct work_struct send_acks_work alive, which is in the peer_device object */
6160668700b4SPhilipp Reisner 
6161668700b4SPhilipp Reisner 	if (err) {
6162668700b4SPhilipp Reisner 		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6163668700b4SPhilipp Reisner 		return;
6164668700b4SPhilipp Reisner 	}
6165668700b4SPhilipp Reisner 
6166668700b4SPhilipp Reisner 	if (tcp_cork)
6167668700b4SPhilipp Reisner 		drbd_tcp_uncork(connection->meta.socket);
6168668700b4SPhilipp Reisner 
6169668700b4SPhilipp Reisner 	return;
6170668700b4SPhilipp Reisner }
6171