193c68cc4SChristoph Böhmwalder // SPDX-License-Identifier: GPL-2.0-only
2b411b363SPhilipp Reisner /*
3b411b363SPhilipp Reisner drbd_receiver.c
4b411b363SPhilipp Reisner
5b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6b411b363SPhilipp Reisner
7b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10b411b363SPhilipp Reisner
11b411b363SPhilipp Reisner */
12b411b363SPhilipp Reisner
13b411b363SPhilipp Reisner
14b411b363SPhilipp Reisner #include <linux/module.h>
15b411b363SPhilipp Reisner
167e5fec31SFabian Frederick #include <linux/uaccess.h>
17b411b363SPhilipp Reisner #include <net/sock.h>
18b411b363SPhilipp Reisner
19b411b363SPhilipp Reisner #include <linux/drbd.h>
20b411b363SPhilipp Reisner #include <linux/fs.h>
21b411b363SPhilipp Reisner #include <linux/file.h>
22b411b363SPhilipp Reisner #include <linux/in.h>
23b411b363SPhilipp Reisner #include <linux/mm.h>
24b411b363SPhilipp Reisner #include <linux/memcontrol.h>
25b411b363SPhilipp Reisner #include <linux/mm_inline.h>
26b411b363SPhilipp Reisner #include <linux/slab.h>
27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
29b411b363SPhilipp Reisner #include <linux/pkt_sched.h>
30b411b363SPhilipp Reisner #include <linux/unistd.h>
31b411b363SPhilipp Reisner #include <linux/vmalloc.h>
32b411b363SPhilipp Reisner #include <linux/random.h>
33b411b363SPhilipp Reisner #include <linux/string.h>
34b411b363SPhilipp Reisner #include <linux/scatterlist.h>
35c6a564ffSChristoph Hellwig #include <linux/part_stat.h>
36b411b363SPhilipp Reisner #include "drbd_int.h"
37a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
38b411b363SPhilipp Reisner #include "drbd_req.h"
39b411b363SPhilipp Reisner #include "drbd_vli.h"
40b411b363SPhilipp Reisner
41f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4220c68fdeSLars Ellenberg
4377351055SPhilipp Reisner struct packet_info {
4477351055SPhilipp Reisner enum drbd_packet cmd;
45e2857216SAndreas Gruenbacher unsigned int size;
46e2857216SAndreas Gruenbacher unsigned int vnr;
47e658983aSAndreas Gruenbacher void *data;
4877351055SPhilipp Reisner };
4977351055SPhilipp Reisner
50b411b363SPhilipp Reisner enum finish_epoch {
51b411b363SPhilipp Reisner FE_STILL_LIVE,
52b411b363SPhilipp Reisner FE_DESTROYED,
53b411b363SPhilipp Reisner FE_RECYCLED,
54b411b363SPhilipp Reisner };
55b411b363SPhilipp Reisner
56bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection);
57bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection);
5869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *);
59a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection);
60bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int);
62b411b363SPhilipp Reisner
63b411b363SPhilipp Reisner
64b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
65b411b363SPhilipp Reisner
6645bb912bSLars Ellenberg /*
6745bb912bSLars Ellenberg * some helper functions to deal with single linked page lists,
6845bb912bSLars Ellenberg * page->private being our "next" pointer.
6945bb912bSLars Ellenberg */
7045bb912bSLars Ellenberg
7145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off.
7245bb912bSLars Ellenberg * Otherwise, don't modify head, and return NULL.
7345bb912bSLars Ellenberg * Locking is the responsibility of the caller.
7445bb912bSLars Ellenberg */
page_chain_del(struct page ** head,int n)7545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n)
7645bb912bSLars Ellenberg {
7745bb912bSLars Ellenberg struct page *page;
7845bb912bSLars Ellenberg struct page *tmp;
7945bb912bSLars Ellenberg
8045bb912bSLars Ellenberg BUG_ON(!n);
8145bb912bSLars Ellenberg BUG_ON(!head);
8245bb912bSLars Ellenberg
8345bb912bSLars Ellenberg page = *head;
8423ce4227SPhilipp Reisner
8523ce4227SPhilipp Reisner if (!page)
8623ce4227SPhilipp Reisner return NULL;
8723ce4227SPhilipp Reisner
8845bb912bSLars Ellenberg while (page) {
8945bb912bSLars Ellenberg tmp = page_chain_next(page);
9045bb912bSLars Ellenberg if (--n == 0)
9145bb912bSLars Ellenberg break; /* found sufficient pages */
9245bb912bSLars Ellenberg if (tmp == NULL)
9345bb912bSLars Ellenberg /* insufficient pages, don't use any of them. */
9445bb912bSLars Ellenberg return NULL;
9545bb912bSLars Ellenberg page = tmp;
9645bb912bSLars Ellenberg }
9745bb912bSLars Ellenberg
9845bb912bSLars Ellenberg /* add end of list marker for the returned list */
9945bb912bSLars Ellenberg set_page_private(page, 0);
10045bb912bSLars Ellenberg /* actual return value, and adjustment of head */
10145bb912bSLars Ellenberg page = *head;
10245bb912bSLars Ellenberg *head = tmp;
10345bb912bSLars Ellenberg return page;
10445bb912bSLars Ellenberg }
10545bb912bSLars Ellenberg
10645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short)
10745bb912bSLars Ellenberg * "private" page chain, before adding it back to a global chain head
10845bb912bSLars Ellenberg * with page_chain_add() under a spinlock. */
page_chain_tail(struct page * page,int * len)10945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len)
11045bb912bSLars Ellenberg {
11145bb912bSLars Ellenberg struct page *tmp;
11245bb912bSLars Ellenberg int i = 1;
113e8628013SJoe Perches while ((tmp = page_chain_next(page))) {
114e8628013SJoe Perches ++i;
115e8628013SJoe Perches page = tmp;
116e8628013SJoe Perches }
11745bb912bSLars Ellenberg if (len)
11845bb912bSLars Ellenberg *len = i;
11945bb912bSLars Ellenberg return page;
12045bb912bSLars Ellenberg }
12145bb912bSLars Ellenberg
page_chain_free(struct page * page)12245bb912bSLars Ellenberg static int page_chain_free(struct page *page)
12345bb912bSLars Ellenberg {
12445bb912bSLars Ellenberg struct page *tmp;
12545bb912bSLars Ellenberg int i = 0;
12645bb912bSLars Ellenberg page_chain_for_each_safe(page, tmp) {
12745bb912bSLars Ellenberg put_page(page);
12845bb912bSLars Ellenberg ++i;
12945bb912bSLars Ellenberg }
13045bb912bSLars Ellenberg return i;
13145bb912bSLars Ellenberg }
13245bb912bSLars Ellenberg
page_chain_add(struct page ** head,struct page * chain_first,struct page * chain_last)13345bb912bSLars Ellenberg static void page_chain_add(struct page **head,
13445bb912bSLars Ellenberg struct page *chain_first, struct page *chain_last)
13545bb912bSLars Ellenberg {
13645bb912bSLars Ellenberg #if 1
13745bb912bSLars Ellenberg struct page *tmp;
13845bb912bSLars Ellenberg tmp = page_chain_tail(chain_first, NULL);
13945bb912bSLars Ellenberg BUG_ON(tmp != chain_last);
14045bb912bSLars Ellenberg #endif
14145bb912bSLars Ellenberg
14245bb912bSLars Ellenberg /* add chain to head */
14345bb912bSLars Ellenberg set_page_private(chain_last, (unsigned long)*head);
14445bb912bSLars Ellenberg *head = chain_first;
14545bb912bSLars Ellenberg }
14645bb912bSLars Ellenberg
__drbd_alloc_pages(struct drbd_device * device,unsigned int number)147b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device,
14818c2d522SAndreas Gruenbacher unsigned int number)
149b411b363SPhilipp Reisner {
150b411b363SPhilipp Reisner struct page *page = NULL;
15145bb912bSLars Ellenberg struct page *tmp = NULL;
15218c2d522SAndreas Gruenbacher unsigned int i = 0;
153b411b363SPhilipp Reisner
154b411b363SPhilipp Reisner /* Yes, testing drbd_pp_vacant outside the lock is racy.
155b411b363SPhilipp Reisner * So what. It saves a spin_lock. */
15645bb912bSLars Ellenberg if (drbd_pp_vacant >= number) {
157b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock);
15845bb912bSLars Ellenberg page = page_chain_del(&drbd_pp_pool, number);
15945bb912bSLars Ellenberg if (page)
16045bb912bSLars Ellenberg drbd_pp_vacant -= number;
161b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock);
16245bb912bSLars Ellenberg if (page)
16345bb912bSLars Ellenberg return page;
164b411b363SPhilipp Reisner }
16545bb912bSLars Ellenberg
166b411b363SPhilipp Reisner /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
167b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD,
168b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */
16945bb912bSLars Ellenberg for (i = 0; i < number; i++) {
17045bb912bSLars Ellenberg tmp = alloc_page(GFP_TRY);
17145bb912bSLars Ellenberg if (!tmp)
17245bb912bSLars Ellenberg break;
17345bb912bSLars Ellenberg set_page_private(tmp, (unsigned long)page);
17445bb912bSLars Ellenberg page = tmp;
17545bb912bSLars Ellenberg }
17645bb912bSLars Ellenberg
17745bb912bSLars Ellenberg if (i == number)
178b411b363SPhilipp Reisner return page;
17945bb912bSLars Ellenberg
18045bb912bSLars Ellenberg /* Not enough pages immediately available this time.
181c37c8ecfSAndreas Gruenbacher * No need to jump around here, drbd_alloc_pages will retry this
18245bb912bSLars Ellenberg * function "soon". */
18345bb912bSLars Ellenberg if (page) {
18445bb912bSLars Ellenberg tmp = page_chain_tail(page, NULL);
18545bb912bSLars Ellenberg spin_lock(&drbd_pp_lock);
18645bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp);
18745bb912bSLars Ellenberg drbd_pp_vacant += i;
18845bb912bSLars Ellenberg spin_unlock(&drbd_pp_lock);
18945bb912bSLars Ellenberg }
19045bb912bSLars Ellenberg return NULL;
191b411b363SPhilipp Reisner }
192b411b363SPhilipp Reisner
reclaim_finished_net_peer_reqs(struct drbd_device * device,struct list_head * to_be_freed)193b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
194a990be46SAndreas Gruenbacher struct list_head *to_be_freed)
195b411b363SPhilipp Reisner {
196a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req, *tmp;
197b411b363SPhilipp Reisner
198b411b363SPhilipp Reisner /* The EEs are always appended to the end of the list. Since
199b411b363SPhilipp Reisner they are sent in order over the wire, they have to finish
200b411b363SPhilipp Reisner in order. As soon as we see the first not finished we can
201b411b363SPhilipp Reisner stop to examine the list... */
202b411b363SPhilipp Reisner
203a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
204045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req))
205b411b363SPhilipp Reisner break;
206a8cd15baSAndreas Gruenbacher list_move(&peer_req->w.list, to_be_freed);
207b411b363SPhilipp Reisner }
208b411b363SPhilipp Reisner }
209b411b363SPhilipp Reisner
drbd_reclaim_net_peer_reqs(struct drbd_device * device)210668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
211b411b363SPhilipp Reisner {
212b411b363SPhilipp Reisner LIST_HEAD(reclaimed);
213db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t;
214b411b363SPhilipp Reisner
2150500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
216b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed);
2170500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
218a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
219b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req);
220b411b363SPhilipp Reisner }
221b411b363SPhilipp Reisner
conn_reclaim_net_peer_reqs(struct drbd_connection * connection)222668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
223668700b4SPhilipp Reisner {
224668700b4SPhilipp Reisner struct drbd_peer_device *peer_device;
225668700b4SPhilipp Reisner int vnr;
226668700b4SPhilipp Reisner
227668700b4SPhilipp Reisner rcu_read_lock();
228668700b4SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
229668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device;
230668700b4SPhilipp Reisner if (!atomic_read(&device->pp_in_use_by_net))
231668700b4SPhilipp Reisner continue;
232668700b4SPhilipp Reisner
233668700b4SPhilipp Reisner kref_get(&device->kref);
234668700b4SPhilipp Reisner rcu_read_unlock();
235668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device);
236668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device);
237668700b4SPhilipp Reisner rcu_read_lock();
238668700b4SPhilipp Reisner }
239668700b4SPhilipp Reisner rcu_read_unlock();
240668700b4SPhilipp Reisner }
241668700b4SPhilipp Reisner
242b411b363SPhilipp Reisner /**
243c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
2449b48ff07SLee Jones * @peer_device: DRBD device.
24545bb912bSLars Ellenberg * @number: number of pages requested
24645bb912bSLars Ellenberg * @retry: whether to retry, if not enough pages are available right now
247b411b363SPhilipp Reisner *
24845bb912bSLars Ellenberg * Tries to allocate number pages, first from our own page pool, then from
2490e49d7b0SLars Ellenberg * the kernel.
25045bb912bSLars Ellenberg * Possibly retry until DRBD frees sufficient pages somewhere else.
25145bb912bSLars Ellenberg *
2520e49d7b0SLars Ellenberg * If this allocation would exceed the max_buffers setting, we throttle
2530e49d7b0SLars Ellenberg * allocation (schedule_timeout) to give the system some room to breathe.
2540e49d7b0SLars Ellenberg *
2550e49d7b0SLars Ellenberg * We do not use max-buffers as hard limit, because it could lead to
2560e49d7b0SLars Ellenberg * congestion and further to a distributed deadlock during online-verify or
2570e49d7b0SLars Ellenberg * (checksum based) resync, if the max-buffers, socket buffer sizes and
2580e49d7b0SLars Ellenberg * resync-rate settings are mis-configured.
2590e49d7b0SLars Ellenberg *
26045bb912bSLars Ellenberg * Returns a page chain linked via page->private.
261b411b363SPhilipp Reisner */
drbd_alloc_pages(struct drbd_peer_device * peer_device,unsigned int number,bool retry)26269a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
263c37c8ecfSAndreas Gruenbacher bool retry)
264b411b363SPhilipp Reisner {
26569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
266b411b363SPhilipp Reisner struct page *page = NULL;
26744ed167dSPhilipp Reisner struct net_conf *nc;
268b411b363SPhilipp Reisner DEFINE_WAIT(wait);
2690e49d7b0SLars Ellenberg unsigned int mxb;
270b411b363SPhilipp Reisner
27144ed167dSPhilipp Reisner rcu_read_lock();
27269a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf);
27344ed167dSPhilipp Reisner mxb = nc ? nc->max_buffers : 1000000;
27444ed167dSPhilipp Reisner rcu_read_unlock();
27544ed167dSPhilipp Reisner
276b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb)
277b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number);
278b411b363SPhilipp Reisner
279668700b4SPhilipp Reisner /* Try to keep the fast path fast, but occasionally we need
280668700b4SPhilipp Reisner * to reclaim the pages we lended to the network stack. */
281668700b4SPhilipp Reisner if (page && atomic_read(&device->pp_in_use_by_net) > 512)
282668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device);
283668700b4SPhilipp Reisner
28445bb912bSLars Ellenberg while (page == NULL) {
285b411b363SPhilipp Reisner prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
286b411b363SPhilipp Reisner
287668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device);
288b411b363SPhilipp Reisner
289b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) {
290b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number);
291b411b363SPhilipp Reisner if (page)
292b411b363SPhilipp Reisner break;
293b411b363SPhilipp Reisner }
294b411b363SPhilipp Reisner
295b411b363SPhilipp Reisner if (!retry)
296b411b363SPhilipp Reisner break;
297b411b363SPhilipp Reisner
298b411b363SPhilipp Reisner if (signal_pending(current)) {
299d0180171SAndreas Gruenbacher drbd_warn(device, "drbd_alloc_pages interrupted!\n");
300b411b363SPhilipp Reisner break;
301b411b363SPhilipp Reisner }
302b411b363SPhilipp Reisner
3030e49d7b0SLars Ellenberg if (schedule_timeout(HZ/10) == 0)
3040e49d7b0SLars Ellenberg mxb = UINT_MAX;
305b411b363SPhilipp Reisner }
306b411b363SPhilipp Reisner finish_wait(&drbd_pp_wait, &wait);
307b411b363SPhilipp Reisner
30845bb912bSLars Ellenberg if (page)
309b30ab791SAndreas Gruenbacher atomic_add(number, &device->pp_in_use);
310b411b363SPhilipp Reisner return page;
311b411b363SPhilipp Reisner }
312b411b363SPhilipp Reisner
313c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
3140500813fSAndreas Gruenbacher * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31545bb912bSLars Ellenberg * Either links the page chain back to the global pool,
31645bb912bSLars Ellenberg * or returns all pages to the system. */
drbd_free_pages(struct drbd_device * device,struct page * page,int is_net)317b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
318b411b363SPhilipp Reisner {
319b30ab791SAndreas Gruenbacher atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
320b411b363SPhilipp Reisner int i;
321435f0740SLars Ellenberg
322a73ff323SLars Ellenberg if (page == NULL)
323a73ff323SLars Ellenberg return;
324a73ff323SLars Ellenberg
325183ece30SRoland Kammerer if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32645bb912bSLars Ellenberg i = page_chain_free(page);
32745bb912bSLars Ellenberg else {
32845bb912bSLars Ellenberg struct page *tmp;
32945bb912bSLars Ellenberg tmp = page_chain_tail(page, &i);
330b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock);
33145bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp);
33245bb912bSLars Ellenberg drbd_pp_vacant += i;
333b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock);
334b411b363SPhilipp Reisner }
335435f0740SLars Ellenberg i = atomic_sub_return(i, a);
33645bb912bSLars Ellenberg if (i < 0)
337d0180171SAndreas Gruenbacher drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
338435f0740SLars Ellenberg is_net ? "pp_in_use_by_net" : "pp_in_use", i);
339b411b363SPhilipp Reisner wake_up(&drbd_pp_wait);
340b411b363SPhilipp Reisner }
341b411b363SPhilipp Reisner
342b411b363SPhilipp Reisner /*
343b411b363SPhilipp Reisner You need to hold the req_lock:
344b411b363SPhilipp Reisner _drbd_wait_ee_list_empty()
345b411b363SPhilipp Reisner
346b411b363SPhilipp Reisner You must not have the req_lock:
3473967deb1SAndreas Gruenbacher drbd_free_peer_req()
3480db55363SAndreas Gruenbacher drbd_alloc_peer_req()
3497721f567SAndreas Gruenbacher drbd_free_peer_reqs()
350b411b363SPhilipp Reisner drbd_ee_fix_bhs()
351a990be46SAndreas Gruenbacher drbd_finish_peer_reqs()
352b411b363SPhilipp Reisner drbd_clear_done_ee()
353b411b363SPhilipp Reisner drbd_wait_ee_list_empty()
354b411b363SPhilipp Reisner */
355b411b363SPhilipp Reisner
3569104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size)
3579104d31aSLars Ellenberg * w_same: payload_size == logical_block_size
3589104d31aSLars Ellenberg * trim: payload_size == 0 */
359f6ffca9fSAndreas Gruenbacher struct drbd_peer_request *
drbd_alloc_peer_req(struct drbd_peer_device * peer_device,u64 id,sector_t sector,unsigned int request_size,unsigned int payload_size,gfp_t gfp_mask)36069a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
3619104d31aSLars Ellenberg unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
362b411b363SPhilipp Reisner {
36369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
364db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req;
365a73ff323SLars Ellenberg struct page *page = NULL;
366e6be38a1SCai Huoqing unsigned int nr_pages = PFN_UP(payload_size);
367b411b363SPhilipp Reisner
368b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
369b411b363SPhilipp Reisner return NULL;
370b411b363SPhilipp Reisner
3710892fac8SKent Overstreet peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
372db830c46SAndreas Gruenbacher if (!peer_req) {
373b411b363SPhilipp Reisner if (!(gfp_mask & __GFP_NOWARN))
374d0180171SAndreas Gruenbacher drbd_err(device, "%s: allocation failed\n", __func__);
375b411b363SPhilipp Reisner return NULL;
376b411b363SPhilipp Reisner }
377b411b363SPhilipp Reisner
3789104d31aSLars Ellenberg if (nr_pages) {
379d0164adcSMel Gorman page = drbd_alloc_pages(peer_device, nr_pages,
380d0164adcSMel Gorman gfpflags_allow_blocking(gfp_mask));
38145bb912bSLars Ellenberg if (!page)
38245bb912bSLars Ellenberg goto fail;
383a73ff323SLars Ellenberg }
384b411b363SPhilipp Reisner
385c5a2c150SLars Ellenberg memset(peer_req, 0, sizeof(*peer_req));
386c5a2c150SLars Ellenberg INIT_LIST_HEAD(&peer_req->w.list);
387db830c46SAndreas Gruenbacher drbd_clear_interval(&peer_req->i);
3889104d31aSLars Ellenberg peer_req->i.size = request_size;
389db830c46SAndreas Gruenbacher peer_req->i.sector = sector;
390c5a2c150SLars Ellenberg peer_req->submit_jif = jiffies;
391a8cd15baSAndreas Gruenbacher peer_req->peer_device = peer_device;
392db830c46SAndreas Gruenbacher peer_req->pages = page;
3939a8e7753SAndreas Gruenbacher /*
3949a8e7753SAndreas Gruenbacher * The block_id is opaque to the receiver. It is not endianness
3959a8e7753SAndreas Gruenbacher * converted, and sent back to the sender unchanged.
3969a8e7753SAndreas Gruenbacher */
397db830c46SAndreas Gruenbacher peer_req->block_id = id;
398b411b363SPhilipp Reisner
399db830c46SAndreas Gruenbacher return peer_req;
400b411b363SPhilipp Reisner
40145bb912bSLars Ellenberg fail:
4020892fac8SKent Overstreet mempool_free(peer_req, &drbd_ee_mempool);
403b411b363SPhilipp Reisner return NULL;
404b411b363SPhilipp Reisner }
405b411b363SPhilipp Reisner
__drbd_free_peer_req(struct drbd_device * device,struct drbd_peer_request * peer_req,int is_net)406b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
407f6ffca9fSAndreas Gruenbacher int is_net)
408b411b363SPhilipp Reisner {
40921ae5d7fSLars Ellenberg might_sleep();
410db830c46SAndreas Gruenbacher if (peer_req->flags & EE_HAS_DIGEST)
411db830c46SAndreas Gruenbacher kfree(peer_req->digest);
412b30ab791SAndreas Gruenbacher drbd_free_pages(device, peer_req->pages, is_net);
4130b0ba1efSAndreas Gruenbacher D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
4140b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i));
415677b3672SChristoph Böhmwalder if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41621ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41721ae5d7fSLars Ellenberg drbd_al_complete_io(device, &peer_req->i);
41821ae5d7fSLars Ellenberg }
4190892fac8SKent Overstreet mempool_free(peer_req, &drbd_ee_mempool);
420b411b363SPhilipp Reisner }
421b411b363SPhilipp Reisner
drbd_free_peer_reqs(struct drbd_device * device,struct list_head * list)422b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
423b411b363SPhilipp Reisner {
424b411b363SPhilipp Reisner LIST_HEAD(work_list);
425db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t;
426b411b363SPhilipp Reisner int count = 0;
427b30ab791SAndreas Gruenbacher int is_net = list == &device->net_ee;
428b411b363SPhilipp Reisner
4290500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
430b411b363SPhilipp Reisner list_splice_init(list, &work_list);
4310500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
432b411b363SPhilipp Reisner
433a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
434b30ab791SAndreas Gruenbacher __drbd_free_peer_req(device, peer_req, is_net);
435b411b363SPhilipp Reisner count++;
436b411b363SPhilipp Reisner }
437b411b363SPhilipp Reisner return count;
438b411b363SPhilipp Reisner }
439b411b363SPhilipp Reisner
440b411b363SPhilipp Reisner /*
441a990be46SAndreas Gruenbacher * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
442b411b363SPhilipp Reisner */
drbd_finish_peer_reqs(struct drbd_device * device)443b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device)
444b411b363SPhilipp Reisner {
445b411b363SPhilipp Reisner LIST_HEAD(work_list);
446b411b363SPhilipp Reisner LIST_HEAD(reclaimed);
447db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t;
448e2b3032bSAndreas Gruenbacher int err = 0;
449b411b363SPhilipp Reisner
4500500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
451b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed);
452b30ab791SAndreas Gruenbacher list_splice_init(&device->done_ee, &work_list);
4530500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
454b411b363SPhilipp Reisner
455a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
456b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req);
457b411b363SPhilipp Reisner
458b411b363SPhilipp Reisner /* possible callbacks here:
459d4dabbe2SLars Ellenberg * e_end_block, and e_end_resync_block, e_send_superseded.
460b411b363SPhilipp Reisner * all ignore the last argument.
461b411b363SPhilipp Reisner */
462a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
463e2b3032bSAndreas Gruenbacher int err2;
464e2b3032bSAndreas Gruenbacher
465b411b363SPhilipp Reisner /* list_del not necessary, next/prev members not touched */
466a8cd15baSAndreas Gruenbacher err2 = peer_req->w.cb(&peer_req->w, !!err);
467e2b3032bSAndreas Gruenbacher if (!err)
468e2b3032bSAndreas Gruenbacher err = err2;
469b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
470b411b363SPhilipp Reisner }
471b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait);
472b411b363SPhilipp Reisner
473e2b3032bSAndreas Gruenbacher return err;
474b411b363SPhilipp Reisner }
475b411b363SPhilipp Reisner
_drbd_wait_ee_list_empty(struct drbd_device * device,struct list_head * head)476b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device,
477d4da1537SAndreas Gruenbacher struct list_head *head)
478b411b363SPhilipp Reisner {
479b411b363SPhilipp Reisner DEFINE_WAIT(wait);
480b411b363SPhilipp Reisner
481b411b363SPhilipp Reisner /* avoids spin_lock/unlock
482b411b363SPhilipp Reisner * and calling prepare_to_wait in the fast path */
483b411b363SPhilipp Reisner while (!list_empty(head)) {
484b30ab791SAndreas Gruenbacher prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
4850500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
4867eaceaccSJens Axboe io_schedule();
487b30ab791SAndreas Gruenbacher finish_wait(&device->ee_wait, &wait);
4880500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
489b411b363SPhilipp Reisner }
490b411b363SPhilipp Reisner }
491b411b363SPhilipp Reisner
drbd_wait_ee_list_empty(struct drbd_device * device,struct list_head * head)492b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device,
493d4da1537SAndreas Gruenbacher struct list_head *head)
494b411b363SPhilipp Reisner {
4950500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
496b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, head);
4970500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
498b411b363SPhilipp Reisner }
499b411b363SPhilipp Reisner
drbd_recv_short(struct socket * sock,void * buf,size_t size,int flags)500dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
501b411b363SPhilipp Reisner {
502b411b363SPhilipp Reisner struct kvec iov = {
503b411b363SPhilipp Reisner .iov_base = buf,
504b411b363SPhilipp Reisner .iov_len = size,
505b411b363SPhilipp Reisner };
506b411b363SPhilipp Reisner struct msghdr msg = {
507b411b363SPhilipp Reisner .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
508b411b363SPhilipp Reisner };
509de4eda9dSAl Viro iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
510f7765c36SAl Viro return sock_recvmsg(sock, &msg, msg.msg_flags);
511b411b363SPhilipp Reisner }
512b411b363SPhilipp Reisner
drbd_recv(struct drbd_connection * connection,void * buf,size_t size)513bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
514b411b363SPhilipp Reisner {
515b411b363SPhilipp Reisner int rv;
516b411b363SPhilipp Reisner
517bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->data.socket, buf, size, 0);
518b411b363SPhilipp Reisner
519b411b363SPhilipp Reisner if (rv < 0) {
520b411b363SPhilipp Reisner if (rv == -ECONNRESET)
5211ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was reset by peer\n");
522b411b363SPhilipp Reisner else if (rv != -ERESTARTSYS)
5231ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv);
524b411b363SPhilipp Reisner } else if (rv == 0) {
525bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) {
526b66623e3SPhilipp Reisner long t;
527b66623e3SPhilipp Reisner rcu_read_lock();
528bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
529b66623e3SPhilipp Reisner rcu_read_unlock();
530b66623e3SPhilipp Reisner
531bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
532b66623e3SPhilipp Reisner
533599377acSPhilipp Reisner if (t)
534599377acSPhilipp Reisner goto out;
535599377acSPhilipp Reisner }
5361ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was shut down by peer\n");
537599377acSPhilipp Reisner }
538599377acSPhilipp Reisner
539b411b363SPhilipp Reisner if (rv != size)
540bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
541b411b363SPhilipp Reisner
542599377acSPhilipp Reisner out:
543b411b363SPhilipp Reisner return rv;
544b411b363SPhilipp Reisner }
545b411b363SPhilipp Reisner
drbd_recv_all(struct drbd_connection * connection,void * buf,size_t size)546bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
547c6967746SAndreas Gruenbacher {
548c6967746SAndreas Gruenbacher int err;
549c6967746SAndreas Gruenbacher
550bde89a9eSAndreas Gruenbacher err = drbd_recv(connection, buf, size);
551c6967746SAndreas Gruenbacher if (err != size) {
552c6967746SAndreas Gruenbacher if (err >= 0)
553c6967746SAndreas Gruenbacher err = -EIO;
554c6967746SAndreas Gruenbacher } else
555c6967746SAndreas Gruenbacher err = 0;
556c6967746SAndreas Gruenbacher return err;
557c6967746SAndreas Gruenbacher }
558c6967746SAndreas Gruenbacher
drbd_recv_all_warn(struct drbd_connection * connection,void * buf,size_t size)559bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
560a5c31904SAndreas Gruenbacher {
561a5c31904SAndreas Gruenbacher int err;
562a5c31904SAndreas Gruenbacher
563bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, buf, size);
564a5c31904SAndreas Gruenbacher if (err && !signal_pending(current))
5651ec861ebSAndreas Gruenbacher drbd_warn(connection, "short read (expected size %d)\n", (int)size);
566a5c31904SAndreas Gruenbacher return err;
567a5c31904SAndreas Gruenbacher }
568a5c31904SAndreas Gruenbacher
5695dbf1673SLars Ellenberg /* quoting tcp(7):
5705dbf1673SLars Ellenberg * On individual connections, the socket buffer size must be set prior to the
5715dbf1673SLars Ellenberg * listen(2) or connect(2) calls in order to have it take effect.
5725dbf1673SLars Ellenberg * This is our wrapper to do so.
5735dbf1673SLars Ellenberg */
drbd_setbufsize(struct socket * sock,unsigned int snd,unsigned int rcv)5745dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd,
5755dbf1673SLars Ellenberg unsigned int rcv)
5765dbf1673SLars Ellenberg {
5775dbf1673SLars Ellenberg /* open coded SO_SNDBUF, SO_RCVBUF */
5785dbf1673SLars Ellenberg if (snd) {
5795dbf1673SLars Ellenberg sock->sk->sk_sndbuf = snd;
5805dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
5815dbf1673SLars Ellenberg }
5825dbf1673SLars Ellenberg if (rcv) {
5835dbf1673SLars Ellenberg sock->sk->sk_rcvbuf = rcv;
5845dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
5855dbf1673SLars Ellenberg }
5865dbf1673SLars Ellenberg }
5875dbf1673SLars Ellenberg
drbd_try_connect(struct drbd_connection * connection)588bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection)
589b411b363SPhilipp Reisner {
590b411b363SPhilipp Reisner const char *what;
591b411b363SPhilipp Reisner struct socket *sock;
592b411b363SPhilipp Reisner struct sockaddr_in6 src_in6;
59344ed167dSPhilipp Reisner struct sockaddr_in6 peer_in6;
59444ed167dSPhilipp Reisner struct net_conf *nc;
59544ed167dSPhilipp Reisner int err, peer_addr_len, my_addr_len;
59669ef82deSAndreas Gruenbacher int sndbuf_size, rcvbuf_size, connect_int;
597b411b363SPhilipp Reisner int disconnect_on_error = 1;
598b411b363SPhilipp Reisner
59944ed167dSPhilipp Reisner rcu_read_lock();
600bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
60144ed167dSPhilipp Reisner if (!nc) {
60244ed167dSPhilipp Reisner rcu_read_unlock();
603b411b363SPhilipp Reisner return NULL;
60444ed167dSPhilipp Reisner }
60544ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size;
60644ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size;
60769ef82deSAndreas Gruenbacher connect_int = nc->connect_int;
608089c075dSAndreas Gruenbacher rcu_read_unlock();
60944ed167dSPhilipp Reisner
610bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
611bde89a9eSAndreas Gruenbacher memcpy(&src_in6, &connection->my_addr, my_addr_len);
61244ed167dSPhilipp Reisner
613bde89a9eSAndreas Gruenbacher if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61444ed167dSPhilipp Reisner src_in6.sin6_port = 0;
61544ed167dSPhilipp Reisner else
61644ed167dSPhilipp Reisner ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61744ed167dSPhilipp Reisner
618bde89a9eSAndreas Gruenbacher peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
619bde89a9eSAndreas Gruenbacher memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
620b411b363SPhilipp Reisner
621b411b363SPhilipp Reisner what = "sock_create_kern";
622eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
623b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &sock);
624b411b363SPhilipp Reisner if (err < 0) {
625b411b363SPhilipp Reisner sock = NULL;
626b411b363SPhilipp Reisner goto out;
627b411b363SPhilipp Reisner }
628b411b363SPhilipp Reisner
629b411b363SPhilipp Reisner sock->sk->sk_rcvtimeo =
63069ef82deSAndreas Gruenbacher sock->sk->sk_sndtimeo = connect_int * HZ;
63144ed167dSPhilipp Reisner drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
632b411b363SPhilipp Reisner
633b411b363SPhilipp Reisner /* explicitly bind to the configured IP as source IP
634b411b363SPhilipp Reisner * for the outgoing connections.
635b411b363SPhilipp Reisner * This is needed for multihomed hosts and to be
636b411b363SPhilipp Reisner * able to use lo: interfaces for drbd.
637b411b363SPhilipp Reisner * Make sure to use 0 as port number, so linux selects
638b411b363SPhilipp Reisner * a free one dynamically.
639b411b363SPhilipp Reisner */
640b411b363SPhilipp Reisner what = "bind before connect";
64144ed167dSPhilipp Reisner err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
642b411b363SPhilipp Reisner if (err < 0)
643b411b363SPhilipp Reisner goto out;
644b411b363SPhilipp Reisner
645b411b363SPhilipp Reisner /* connect may fail, peer not yet available.
646b411b363SPhilipp Reisner * stay C_WF_CONNECTION, don't go Disconnecting! */
647b411b363SPhilipp Reisner disconnect_on_error = 0;
648b411b363SPhilipp Reisner what = "connect";
64944ed167dSPhilipp Reisner err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
650b411b363SPhilipp Reisner
651b411b363SPhilipp Reisner out:
652b411b363SPhilipp Reisner if (err < 0) {
653b411b363SPhilipp Reisner if (sock) {
654b411b363SPhilipp Reisner sock_release(sock);
655b411b363SPhilipp Reisner sock = NULL;
656b411b363SPhilipp Reisner }
657b411b363SPhilipp Reisner switch (-err) {
658b411b363SPhilipp Reisner /* timeout, busy, signal pending */
659b411b363SPhilipp Reisner case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
660b411b363SPhilipp Reisner case EINTR: case ERESTARTSYS:
661b411b363SPhilipp Reisner /* peer not (yet) available, network problem */
662b411b363SPhilipp Reisner case ECONNREFUSED: case ENETUNREACH:
663b411b363SPhilipp Reisner case EHOSTDOWN: case EHOSTUNREACH:
664b411b363SPhilipp Reisner disconnect_on_error = 0;
665b411b363SPhilipp Reisner break;
666b411b363SPhilipp Reisner default:
6671ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err);
668b411b363SPhilipp Reisner }
669b411b363SPhilipp Reisner if (disconnect_on_error)
670bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
671b411b363SPhilipp Reisner }
67244ed167dSPhilipp Reisner
673b411b363SPhilipp Reisner return sock;
674b411b363SPhilipp Reisner }
675b411b363SPhilipp Reisner
6767a426fd8SPhilipp Reisner struct accept_wait_data {
677bde89a9eSAndreas Gruenbacher struct drbd_connection *connection;
6787a426fd8SPhilipp Reisner struct socket *s_listen;
6797a426fd8SPhilipp Reisner struct completion door_bell;
6807a426fd8SPhilipp Reisner void (*original_sk_state_change)(struct sock *sk);
6817a426fd8SPhilipp Reisner
6827a426fd8SPhilipp Reisner };
6837a426fd8SPhilipp Reisner
drbd_incoming_connection(struct sock * sk)684715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk)
685b411b363SPhilipp Reisner {
6867a426fd8SPhilipp Reisner struct accept_wait_data *ad = sk->sk_user_data;
687715306f6SAndreas Gruenbacher void (*state_change)(struct sock *sk);
6887a426fd8SPhilipp Reisner
689715306f6SAndreas Gruenbacher state_change = ad->original_sk_state_change;
690715306f6SAndreas Gruenbacher if (sk->sk_state == TCP_ESTABLISHED)
6917a426fd8SPhilipp Reisner complete(&ad->door_bell);
692715306f6SAndreas Gruenbacher state_change(sk);
6937a426fd8SPhilipp Reisner }
6947a426fd8SPhilipp Reisner
prepare_listen_socket(struct drbd_connection * connection,struct accept_wait_data * ad)695bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
696b411b363SPhilipp Reisner {
6971f3e509bSPhilipp Reisner int err, sndbuf_size, rcvbuf_size, my_addr_len;
69844ed167dSPhilipp Reisner struct sockaddr_in6 my_addr;
6991f3e509bSPhilipp Reisner struct socket *s_listen;
70044ed167dSPhilipp Reisner struct net_conf *nc;
701b411b363SPhilipp Reisner const char *what;
702b411b363SPhilipp Reisner
70344ed167dSPhilipp Reisner rcu_read_lock();
704bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
70544ed167dSPhilipp Reisner if (!nc) {
70644ed167dSPhilipp Reisner rcu_read_unlock();
7077a426fd8SPhilipp Reisner return -EIO;
70844ed167dSPhilipp Reisner }
70944ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size;
71044ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size;
71144ed167dSPhilipp Reisner rcu_read_unlock();
712b411b363SPhilipp Reisner
713bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
714bde89a9eSAndreas Gruenbacher memcpy(&my_addr, &connection->my_addr, my_addr_len);
715b411b363SPhilipp Reisner
716b411b363SPhilipp Reisner what = "sock_create_kern";
717eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
718b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &s_listen);
719b411b363SPhilipp Reisner if (err) {
720b411b363SPhilipp Reisner s_listen = NULL;
721b411b363SPhilipp Reisner goto out;
722b411b363SPhilipp Reisner }
723b411b363SPhilipp Reisner
7244a17fd52SPavel Emelyanov s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72544ed167dSPhilipp Reisner drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
726b411b363SPhilipp Reisner
727b411b363SPhilipp Reisner what = "bind before listen";
72844ed167dSPhilipp Reisner err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
729b411b363SPhilipp Reisner if (err < 0)
730b411b363SPhilipp Reisner goto out;
731b411b363SPhilipp Reisner
7327a426fd8SPhilipp Reisner ad->s_listen = s_listen;
7337a426fd8SPhilipp Reisner write_lock_bh(&s_listen->sk->sk_callback_lock);
7347a426fd8SPhilipp Reisner ad->original_sk_state_change = s_listen->sk->sk_state_change;
735715306f6SAndreas Gruenbacher s_listen->sk->sk_state_change = drbd_incoming_connection;
7367a426fd8SPhilipp Reisner s_listen->sk->sk_user_data = ad;
7377a426fd8SPhilipp Reisner write_unlock_bh(&s_listen->sk->sk_callback_lock);
738b411b363SPhilipp Reisner
7392820fd39SPhilipp Reisner what = "listen";
7402820fd39SPhilipp Reisner err = s_listen->ops->listen(s_listen, 5);
7412820fd39SPhilipp Reisner if (err < 0)
7422820fd39SPhilipp Reisner goto out;
7432820fd39SPhilipp Reisner
7447a426fd8SPhilipp Reisner return 0;
745b411b363SPhilipp Reisner out:
746b411b363SPhilipp Reisner if (s_listen)
747b411b363SPhilipp Reisner sock_release(s_listen);
748b411b363SPhilipp Reisner if (err < 0) {
749b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7501ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err);
751bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
752b411b363SPhilipp Reisner }
753b411b363SPhilipp Reisner }
7541f3e509bSPhilipp Reisner
7557a426fd8SPhilipp Reisner return -EIO;
7561f3e509bSPhilipp Reisner }
7571f3e509bSPhilipp Reisner
unregister_state_change(struct sock * sk,struct accept_wait_data * ad)758715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
759715306f6SAndreas Gruenbacher {
760715306f6SAndreas Gruenbacher write_lock_bh(&sk->sk_callback_lock);
761715306f6SAndreas Gruenbacher sk->sk_state_change = ad->original_sk_state_change;
762715306f6SAndreas Gruenbacher sk->sk_user_data = NULL;
763715306f6SAndreas Gruenbacher write_unlock_bh(&sk->sk_callback_lock);
764715306f6SAndreas Gruenbacher }
765715306f6SAndreas Gruenbacher
drbd_wait_for_connect(struct drbd_connection * connection,struct accept_wait_data * ad)766bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
7671f3e509bSPhilipp Reisner {
7681f3e509bSPhilipp Reisner int timeo, connect_int, err = 0;
7691f3e509bSPhilipp Reisner struct socket *s_estab = NULL;
7701f3e509bSPhilipp Reisner struct net_conf *nc;
7711f3e509bSPhilipp Reisner
7721f3e509bSPhilipp Reisner rcu_read_lock();
773bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
7741f3e509bSPhilipp Reisner if (!nc) {
7751f3e509bSPhilipp Reisner rcu_read_unlock();
7761f3e509bSPhilipp Reisner return NULL;
7771f3e509bSPhilipp Reisner }
7781f3e509bSPhilipp Reisner connect_int = nc->connect_int;
7791f3e509bSPhilipp Reisner rcu_read_unlock();
7801f3e509bSPhilipp Reisner
7811f3e509bSPhilipp Reisner timeo = connect_int * HZ;
78238b682b2SAkinobu Mita /* 28.5% random jitter */
7838032bf12SJason A. Donenfeld timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
7841f3e509bSPhilipp Reisner
7857a426fd8SPhilipp Reisner err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
7867a426fd8SPhilipp Reisner if (err <= 0)
7877a426fd8SPhilipp Reisner return NULL;
7881f3e509bSPhilipp Reisner
7897a426fd8SPhilipp Reisner err = kernel_accept(ad->s_listen, &s_estab, 0);
790b411b363SPhilipp Reisner if (err < 0) {
791b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7921ec861ebSAndreas Gruenbacher drbd_err(connection, "accept failed, err = %d\n", err);
793bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
794b411b363SPhilipp Reisner }
795b411b363SPhilipp Reisner }
796b411b363SPhilipp Reisner
797715306f6SAndreas Gruenbacher if (s_estab)
798715306f6SAndreas Gruenbacher unregister_state_change(s_estab->sk, ad);
799b411b363SPhilipp Reisner
800b411b363SPhilipp Reisner return s_estab;
801b411b363SPhilipp Reisner }
802b411b363SPhilipp Reisner
803bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *);
804b411b363SPhilipp Reisner
send_first_packet(struct drbd_connection * connection,struct drbd_socket * sock,enum drbd_packet cmd)805bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
8069f5bdc33SAndreas Gruenbacher enum drbd_packet cmd)
8079f5bdc33SAndreas Gruenbacher {
808bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock))
8099f5bdc33SAndreas Gruenbacher return -EIO;
810bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, cmd, 0, NULL, 0);
811b411b363SPhilipp Reisner }
812b411b363SPhilipp Reisner
receive_first_packet(struct drbd_connection * connection,struct socket * sock)813bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
814b411b363SPhilipp Reisner {
815bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection);
8169f5bdc33SAndreas Gruenbacher struct packet_info pi;
8174920e37aSPhilipp Reisner struct net_conf *nc;
8189f5bdc33SAndreas Gruenbacher int err;
819b411b363SPhilipp Reisner
8204920e37aSPhilipp Reisner rcu_read_lock();
8214920e37aSPhilipp Reisner nc = rcu_dereference(connection->net_conf);
8224920e37aSPhilipp Reisner if (!nc) {
8234920e37aSPhilipp Reisner rcu_read_unlock();
8244920e37aSPhilipp Reisner return -EIO;
8254920e37aSPhilipp Reisner }
8264920e37aSPhilipp Reisner sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
8274920e37aSPhilipp Reisner rcu_read_unlock();
8284920e37aSPhilipp Reisner
829bde89a9eSAndreas Gruenbacher err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
8309f5bdc33SAndreas Gruenbacher if (err != header_size) {
8319f5bdc33SAndreas Gruenbacher if (err >= 0)
8329f5bdc33SAndreas Gruenbacher err = -EIO;
8339f5bdc33SAndreas Gruenbacher return err;
8349f5bdc33SAndreas Gruenbacher }
835bde89a9eSAndreas Gruenbacher err = decode_header(connection, connection->data.rbuf, &pi);
8369f5bdc33SAndreas Gruenbacher if (err)
8379f5bdc33SAndreas Gruenbacher return err;
8389f5bdc33SAndreas Gruenbacher return pi.cmd;
839b411b363SPhilipp Reisner }
840b411b363SPhilipp Reisner
841b411b363SPhilipp Reisner /**
842b411b363SPhilipp Reisner * drbd_socket_okay() - Free the socket if its connection is not okay
843b411b363SPhilipp Reisner * @sock: pointer to the pointer to the socket.
844b411b363SPhilipp Reisner */
drbd_socket_okay(struct socket ** sock)8455d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock)
846b411b363SPhilipp Reisner {
847b411b363SPhilipp Reisner int rr;
848b411b363SPhilipp Reisner char tb[4];
849b411b363SPhilipp Reisner
850b411b363SPhilipp Reisner if (!*sock)
85181e84650SAndreas Gruenbacher return false;
852b411b363SPhilipp Reisner
853dbd9eea0SPhilipp Reisner rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
854b411b363SPhilipp Reisner
855b411b363SPhilipp Reisner if (rr > 0 || rr == -EAGAIN) {
85681e84650SAndreas Gruenbacher return true;
857b411b363SPhilipp Reisner } else {
858b411b363SPhilipp Reisner sock_release(*sock);
859b411b363SPhilipp Reisner *sock = NULL;
86081e84650SAndreas Gruenbacher return false;
861b411b363SPhilipp Reisner }
862b411b363SPhilipp Reisner }
8635d0b17f1SPhilipp Reisner
connection_established(struct drbd_connection * connection,struct socket ** sock1,struct socket ** sock2)8645d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection,
8655d0b17f1SPhilipp Reisner struct socket **sock1,
8665d0b17f1SPhilipp Reisner struct socket **sock2)
8675d0b17f1SPhilipp Reisner {
8685d0b17f1SPhilipp Reisner struct net_conf *nc;
8695d0b17f1SPhilipp Reisner int timeout;
8705d0b17f1SPhilipp Reisner bool ok;
8715d0b17f1SPhilipp Reisner
8725d0b17f1SPhilipp Reisner if (!*sock1 || !*sock2)
8735d0b17f1SPhilipp Reisner return false;
8745d0b17f1SPhilipp Reisner
8755d0b17f1SPhilipp Reisner rcu_read_lock();
8765d0b17f1SPhilipp Reisner nc = rcu_dereference(connection->net_conf);
8775d0b17f1SPhilipp Reisner timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
8785d0b17f1SPhilipp Reisner rcu_read_unlock();
8795d0b17f1SPhilipp Reisner schedule_timeout_interruptible(timeout);
8805d0b17f1SPhilipp Reisner
8815d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock1);
8825d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock2) && ok;
8835d0b17f1SPhilipp Reisner
8845d0b17f1SPhilipp Reisner return ok;
8855d0b17f1SPhilipp Reisner }
8865d0b17f1SPhilipp Reisner
8872325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created
8882325eb66SPhilipp Reisner in a connection */
drbd_connected(struct drbd_peer_device * peer_device)88969a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device)
890907599e0SPhilipp Reisner {
89169a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
8920829f5edSAndreas Gruenbacher int err;
893907599e0SPhilipp Reisner
894b30ab791SAndreas Gruenbacher atomic_set(&device->packet_seq, 0);
895b30ab791SAndreas Gruenbacher device->peer_seq = 0;
896907599e0SPhilipp Reisner
89769a22773SAndreas Gruenbacher device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89869a22773SAndreas Gruenbacher &peer_device->connection->cstate_mutex :
899b30ab791SAndreas Gruenbacher &device->own_state_mutex;
9008410da8fSPhilipp Reisner
90169a22773SAndreas Gruenbacher err = drbd_send_sync_param(peer_device);
9020829f5edSAndreas Gruenbacher if (!err)
90369a22773SAndreas Gruenbacher err = drbd_send_sizes(peer_device, 0, 0);
9040829f5edSAndreas Gruenbacher if (!err)
90569a22773SAndreas Gruenbacher err = drbd_send_uuids(peer_device);
9060829f5edSAndreas Gruenbacher if (!err)
90769a22773SAndreas Gruenbacher err = drbd_send_current_state(peer_device);
908b30ab791SAndreas Gruenbacher clear_bit(USE_DEGR_WFC_T, &device->flags);
909b30ab791SAndreas Gruenbacher clear_bit(RESIZE_PENDING, &device->flags);
910b30ab791SAndreas Gruenbacher atomic_set(&device->ap_in_flight, 0);
911b30ab791SAndreas Gruenbacher mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
9120829f5edSAndreas Gruenbacher return err;
913907599e0SPhilipp Reisner }
914b411b363SPhilipp Reisner
915b411b363SPhilipp Reisner /*
916b411b363SPhilipp Reisner * return values:
917b411b363SPhilipp Reisner * 1 yes, we have a valid connection
918b411b363SPhilipp Reisner * 0 oops, did not work out, please try again
919b411b363SPhilipp Reisner * -1 peer talks different language,
920b411b363SPhilipp Reisner * no point in trying again, please go standalone.
921b411b363SPhilipp Reisner * -2 We do not have a network config...
922b411b363SPhilipp Reisner */
conn_connect(struct drbd_connection * connection)923bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection)
924b411b363SPhilipp Reisner {
9257da35862SPhilipp Reisner struct drbd_socket sock, msock;
926c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device;
92744ed167dSPhilipp Reisner struct net_conf *nc;
9285d0b17f1SPhilipp Reisner int vnr, timeout, h;
9295d0b17f1SPhilipp Reisner bool discard_my_data, ok;
930197296ffSPhilipp Reisner enum drbd_state_rv rv;
9317a426fd8SPhilipp Reisner struct accept_wait_data ad = {
932bde89a9eSAndreas Gruenbacher .connection = connection,
9337a426fd8SPhilipp Reisner .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
9347a426fd8SPhilipp Reisner };
935b411b363SPhilipp Reisner
936bde89a9eSAndreas Gruenbacher clear_bit(DISCONNECT_SENT, &connection->flags);
937bde89a9eSAndreas Gruenbacher if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
938b411b363SPhilipp Reisner return -2;
939b411b363SPhilipp Reisner
9407da35862SPhilipp Reisner mutex_init(&sock.mutex);
941bde89a9eSAndreas Gruenbacher sock.sbuf = connection->data.sbuf;
942bde89a9eSAndreas Gruenbacher sock.rbuf = connection->data.rbuf;
9437da35862SPhilipp Reisner sock.socket = NULL;
9447da35862SPhilipp Reisner mutex_init(&msock.mutex);
945bde89a9eSAndreas Gruenbacher msock.sbuf = connection->meta.sbuf;
946bde89a9eSAndreas Gruenbacher msock.rbuf = connection->meta.rbuf;
9477da35862SPhilipp Reisner msock.socket = NULL;
9487da35862SPhilipp Reisner
9490916e0e3SAndreas Gruenbacher /* Assume that the peer only understands protocol 80 until we know better. */
950bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = 80;
951b411b363SPhilipp Reisner
952bde89a9eSAndreas Gruenbacher if (prepare_listen_socket(connection, &ad))
9537a426fd8SPhilipp Reisner return 0;
954b411b363SPhilipp Reisner
955b411b363SPhilipp Reisner do {
9562bf89621SAndreas Gruenbacher struct socket *s;
957b411b363SPhilipp Reisner
958bde89a9eSAndreas Gruenbacher s = drbd_try_connect(connection);
959b411b363SPhilipp Reisner if (s) {
9607da35862SPhilipp Reisner if (!sock.socket) {
9617da35862SPhilipp Reisner sock.socket = s;
962bde89a9eSAndreas Gruenbacher send_first_packet(connection, &sock, P_INITIAL_DATA);
9637da35862SPhilipp Reisner } else if (!msock.socket) {
964bde89a9eSAndreas Gruenbacher clear_bit(RESOLVE_CONFLICTS, &connection->flags);
9657da35862SPhilipp Reisner msock.socket = s;
966bde89a9eSAndreas Gruenbacher send_first_packet(connection, &msock, P_INITIAL_META);
967b411b363SPhilipp Reisner } else {
9681ec861ebSAndreas Gruenbacher drbd_err(connection, "Logic error in conn_connect()\n");
969b411b363SPhilipp Reisner goto out_release_sockets;
970b411b363SPhilipp Reisner }
971b411b363SPhilipp Reisner }
972b411b363SPhilipp Reisner
9735d0b17f1SPhilipp Reisner if (connection_established(connection, &sock.socket, &msock.socket))
974b411b363SPhilipp Reisner break;
975b411b363SPhilipp Reisner
976b411b363SPhilipp Reisner retry:
977bde89a9eSAndreas Gruenbacher s = drbd_wait_for_connect(connection, &ad);
978b411b363SPhilipp Reisner if (s) {
979bde89a9eSAndreas Gruenbacher int fp = receive_first_packet(connection, s);
9807da35862SPhilipp Reisner drbd_socket_okay(&sock.socket);
9817da35862SPhilipp Reisner drbd_socket_okay(&msock.socket);
98292f14951SPhilipp Reisner switch (fp) {
983e5d6f33aSAndreas Gruenbacher case P_INITIAL_DATA:
9847da35862SPhilipp Reisner if (sock.socket) {
9851ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet S crossed\n");
9867da35862SPhilipp Reisner sock_release(sock.socket);
98780c6eed4SPhilipp Reisner sock.socket = s;
98880c6eed4SPhilipp Reisner goto randomize;
989b411b363SPhilipp Reisner }
9907da35862SPhilipp Reisner sock.socket = s;
991b411b363SPhilipp Reisner break;
992e5d6f33aSAndreas Gruenbacher case P_INITIAL_META:
993bde89a9eSAndreas Gruenbacher set_bit(RESOLVE_CONFLICTS, &connection->flags);
9947da35862SPhilipp Reisner if (msock.socket) {
9951ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet M crossed\n");
9967da35862SPhilipp Reisner sock_release(msock.socket);
99780c6eed4SPhilipp Reisner msock.socket = s;
99880c6eed4SPhilipp Reisner goto randomize;
999b411b363SPhilipp Reisner }
10007da35862SPhilipp Reisner msock.socket = s;
1001b411b363SPhilipp Reisner break;
1002b411b363SPhilipp Reisner default:
10031ec861ebSAndreas Gruenbacher drbd_warn(connection, "Error receiving initial packet\n");
1004b411b363SPhilipp Reisner sock_release(s);
100580c6eed4SPhilipp Reisner randomize:
10068032bf12SJason A. Donenfeld if (get_random_u32_below(2))
1007b411b363SPhilipp Reisner goto retry;
1008b411b363SPhilipp Reisner }
1009b411b363SPhilipp Reisner }
1010b411b363SPhilipp Reisner
1011bde89a9eSAndreas Gruenbacher if (connection->cstate <= C_DISCONNECTING)
1012b411b363SPhilipp Reisner goto out_release_sockets;
1013b411b363SPhilipp Reisner if (signal_pending(current)) {
1014b411b363SPhilipp Reisner flush_signals(current);
1015b411b363SPhilipp Reisner smp_rmb();
1016bde89a9eSAndreas Gruenbacher if (get_t_state(&connection->receiver) == EXITING)
1017b411b363SPhilipp Reisner goto out_release_sockets;
1018b411b363SPhilipp Reisner }
1019b411b363SPhilipp Reisner
10205d0b17f1SPhilipp Reisner ok = connection_established(connection, &sock.socket, &msock.socket);
1021b666dbf8SPhilipp Reisner } while (!ok);
1022b411b363SPhilipp Reisner
10237a426fd8SPhilipp Reisner if (ad.s_listen)
10247a426fd8SPhilipp Reisner sock_release(ad.s_listen);
1025b411b363SPhilipp Reisner
102698683650SPhilipp Reisner sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102798683650SPhilipp Reisner msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1028b411b363SPhilipp Reisner
10297da35862SPhilipp Reisner sock.socket->sk->sk_allocation = GFP_NOIO;
10307da35862SPhilipp Reisner msock.socket->sk->sk_allocation = GFP_NOIO;
1031b411b363SPhilipp Reisner
103298123866SBenjamin Coddington sock.socket->sk->sk_use_task_frag = false;
103398123866SBenjamin Coddington msock.socket->sk->sk_use_task_frag = false;
103498123866SBenjamin Coddington
10357da35862SPhilipp Reisner sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
10367da35862SPhilipp Reisner msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1037b411b363SPhilipp Reisner
1038b411b363SPhilipp Reisner /* NOT YET ...
1039bde89a9eSAndreas Gruenbacher * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
10407da35862SPhilipp Reisner * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
10416038178eSAndreas Gruenbacher * first set it to the P_CONNECTION_FEATURES timeout,
1042b411b363SPhilipp Reisner * which we set to 4x the configured ping_timeout. */
104344ed167dSPhilipp Reisner rcu_read_lock();
1044bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
1045b411b363SPhilipp Reisner
10467da35862SPhilipp Reisner sock.socket->sk->sk_sndtimeo =
10477da35862SPhilipp Reisner sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104844ed167dSPhilipp Reisner
10497da35862SPhilipp Reisner msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
105044ed167dSPhilipp Reisner timeout = nc->timeout * HZ / 10;
105108b165baSPhilipp Reisner discard_my_data = nc->discard_my_data;
105244ed167dSPhilipp Reisner rcu_read_unlock();
105344ed167dSPhilipp Reisner
10547da35862SPhilipp Reisner msock.socket->sk->sk_sndtimeo = timeout;
1055b411b363SPhilipp Reisner
1056b411b363SPhilipp Reisner /* we don't want delays.
105725985edcSLucas De Marchi * we use TCP_CORK where appropriate, though */
105812abc5eeSChristoph Hellwig tcp_sock_set_nodelay(sock.socket->sk);
105912abc5eeSChristoph Hellwig tcp_sock_set_nodelay(msock.socket->sk);
1060b411b363SPhilipp Reisner
1061bde89a9eSAndreas Gruenbacher connection->data.socket = sock.socket;
1062bde89a9eSAndreas Gruenbacher connection->meta.socket = msock.socket;
1063bde89a9eSAndreas Gruenbacher connection->last_received = jiffies;
1064b411b363SPhilipp Reisner
1065bde89a9eSAndreas Gruenbacher h = drbd_do_features(connection);
1066b411b363SPhilipp Reisner if (h <= 0)
1067b411b363SPhilipp Reisner return h;
1068b411b363SPhilipp Reisner
1069bde89a9eSAndreas Gruenbacher if (connection->cram_hmac_tfm) {
1070b30ab791SAndreas Gruenbacher /* drbd_request_state(device, NS(conn, WFAuth)); */
1071bde89a9eSAndreas Gruenbacher switch (drbd_do_auth(connection)) {
1072b10d96cbSJohannes Thoma case -1:
10731ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed\n");
1074b411b363SPhilipp Reisner return -1;
1075b10d96cbSJohannes Thoma case 0:
10761ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed, trying again.\n");
1077b10d96cbSJohannes Thoma return 0;
1078b411b363SPhilipp Reisner }
1079b411b363SPhilipp Reisner }
1080b411b363SPhilipp Reisner
1081bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_sndtimeo = timeout;
1082bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1083b411b363SPhilipp Reisner
1084bde89a9eSAndreas Gruenbacher if (drbd_send_protocol(connection) == -EOPNOTSUPP)
10857e2455c1SPhilipp Reisner return -1;
10861e86ac48SPhilipp Reisner
108713c76abaSPhilipp Reisner /* Prevent a race between resync-handshake and
108813c76abaSPhilipp Reisner * being promoted to Primary.
108913c76abaSPhilipp Reisner *
109013c76abaSPhilipp Reisner * Grab and release the state mutex, so we know that any current
109113c76abaSPhilipp Reisner * drbd_set_role() is finished, and any incoming drbd_set_role
109213c76abaSPhilipp Reisner * will see the STATE_SENT flag, and wait for it to be cleared.
109313c76abaSPhilipp Reisner */
109431007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109531007745SPhilipp Reisner mutex_lock(peer_device->device->state_mutex);
109631007745SPhilipp Reisner
1097cde81d99SLars Ellenberg /* avoid a race with conn_request_state( C_DISCONNECTING ) */
1098cde81d99SLars Ellenberg spin_lock_irq(&connection->resource->req_lock);
109931007745SPhilipp Reisner set_bit(STATE_SENT, &connection->flags);
1100cde81d99SLars Ellenberg spin_unlock_irq(&connection->resource->req_lock);
110131007745SPhilipp Reisner
110231007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110331007745SPhilipp Reisner mutex_unlock(peer_device->device->state_mutex);
110431007745SPhilipp Reisner
110531007745SPhilipp Reisner rcu_read_lock();
110631007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110731007745SPhilipp Reisner struct drbd_device *device = peer_device->device;
110831007745SPhilipp Reisner kref_get(&device->kref);
110931007745SPhilipp Reisner rcu_read_unlock();
111013c76abaSPhilipp Reisner
111108b165baSPhilipp Reisner if (discard_my_data)
1112b30ab791SAndreas Gruenbacher set_bit(DISCARD_MY_DATA, &device->flags);
111308b165baSPhilipp Reisner else
1114b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags);
111508b165baSPhilipp Reisner
111669a22773SAndreas Gruenbacher drbd_connected(peer_device);
111705a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device);
1118c141ebdaSPhilipp Reisner rcu_read_lock();
1119c141ebdaSPhilipp Reisner }
1120c141ebdaSPhilipp Reisner rcu_read_unlock();
1121c141ebdaSPhilipp Reisner
1122bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1123bde89a9eSAndreas Gruenbacher if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1124bde89a9eSAndreas Gruenbacher clear_bit(STATE_SENT, &connection->flags);
11251e86ac48SPhilipp Reisner return 0;
1126a1096a6eSPhilipp Reisner }
11271e86ac48SPhilipp Reisner
11281c03e520SPhilipp Reisner drbd_thread_start(&connection->ack_receiver);
112939e91a60SLars Ellenberg /* opencoded create_singlethread_workqueue(),
113039e91a60SLars Ellenberg * to be able to use format string arguments */
113139e91a60SLars Ellenberg connection->ack_sender =
113239e91a60SLars Ellenberg alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1133668700b4SPhilipp Reisner if (!connection->ack_sender) {
1134668700b4SPhilipp Reisner drbd_err(connection, "Failed to create workqueue ack_sender\n");
1135668700b4SPhilipp Reisner return 0;
1136668700b4SPhilipp Reisner }
1137b411b363SPhilipp Reisner
11380500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update);
113908b165baSPhilipp Reisner /* The discard_my_data flag is a single-shot modifier to the next
114008b165baSPhilipp Reisner * connection attempt, the handshake of which is now well underway.
114108b165baSPhilipp Reisner * No need for rcu style copying of the whole struct
114208b165baSPhilipp Reisner * just to clear a single value. */
1143bde89a9eSAndreas Gruenbacher connection->net_conf->discard_my_data = 0;
11440500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
114508b165baSPhilipp Reisner
1146d3fcb490SPhilipp Reisner return h;
1147b411b363SPhilipp Reisner
1148b411b363SPhilipp Reisner out_release_sockets:
11497a426fd8SPhilipp Reisner if (ad.s_listen)
11507a426fd8SPhilipp Reisner sock_release(ad.s_listen);
11517da35862SPhilipp Reisner if (sock.socket)
11527da35862SPhilipp Reisner sock_release(sock.socket);
11537da35862SPhilipp Reisner if (msock.socket)
11547da35862SPhilipp Reisner sock_release(msock.socket);
1155b411b363SPhilipp Reisner return -1;
1156b411b363SPhilipp Reisner }
1157b411b363SPhilipp Reisner
decode_header(struct drbd_connection * connection,void * header,struct packet_info * pi)1158bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1159b411b363SPhilipp Reisner {
1160bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection);
1161b411b363SPhilipp Reisner
11620c8e36d9SAndreas Gruenbacher if (header_size == sizeof(struct p_header100) &&
11630c8e36d9SAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
11640c8e36d9SAndreas Gruenbacher struct p_header100 *h = header;
11650c8e36d9SAndreas Gruenbacher if (h->pad != 0) {
11661ec861ebSAndreas Gruenbacher drbd_err(connection, "Header padding is not zero\n");
11670c8e36d9SAndreas Gruenbacher return -EINVAL;
116802918be2SPhilipp Reisner }
11690c8e36d9SAndreas Gruenbacher pi->vnr = be16_to_cpu(h->volume);
11700c8e36d9SAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command);
11710c8e36d9SAndreas Gruenbacher pi->size = be32_to_cpu(h->length);
11720c8e36d9SAndreas Gruenbacher } else if (header_size == sizeof(struct p_header95) &&
1173e658983aSAndreas Gruenbacher *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1174e658983aSAndreas Gruenbacher struct p_header95 *h = header;
1175e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command);
1176b55d84baSAndreas Gruenbacher pi->size = be32_to_cpu(h->length);
1177eefc2f7dSPhilipp Reisner pi->vnr = 0;
1178e658983aSAndreas Gruenbacher } else if (header_size == sizeof(struct p_header80) &&
1179e658983aSAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1180e658983aSAndreas Gruenbacher struct p_header80 *h = header;
1181e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command);
1182e658983aSAndreas Gruenbacher pi->size = be16_to_cpu(h->length);
118377351055SPhilipp Reisner pi->vnr = 0;
118402918be2SPhilipp Reisner } else {
11851ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1186e658983aSAndreas Gruenbacher be32_to_cpu(*(__be32 *)header),
1187bde89a9eSAndreas Gruenbacher connection->agreed_pro_version);
11888172f3e9SAndreas Gruenbacher return -EINVAL;
1189b411b363SPhilipp Reisner }
1190e658983aSAndreas Gruenbacher pi->data = header + header_size;
11918172f3e9SAndreas Gruenbacher return 0;
1192b411b363SPhilipp Reisner }
1193b411b363SPhilipp Reisner
drbd_unplug_all_devices(struct drbd_connection * connection)1194c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection)
1195c51a0ef3SLars Ellenberg {
1196c51a0ef3SLars Ellenberg if (current->plug == &connection->receiver_plug) {
1197c51a0ef3SLars Ellenberg blk_finish_plug(&connection->receiver_plug);
1198c51a0ef3SLars Ellenberg blk_start_plug(&connection->receiver_plug);
1199c51a0ef3SLars Ellenberg } /* else: maybe just schedule() ?? */
1200c51a0ef3SLars Ellenberg }
1201c51a0ef3SLars Ellenberg
drbd_recv_header(struct drbd_connection * connection,struct packet_info * pi)1202bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1203257d0af6SPhilipp Reisner {
1204bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf;
120569bc7bc3SAndreas Gruenbacher int err;
1206257d0af6SPhilipp Reisner
1207bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1208a5c31904SAndreas Gruenbacher if (err)
120969bc7bc3SAndreas Gruenbacher return err;
1210257d0af6SPhilipp Reisner
1211bde89a9eSAndreas Gruenbacher err = decode_header(connection, buffer, pi);
1212bde89a9eSAndreas Gruenbacher connection->last_received = jiffies;
1213b411b363SPhilipp Reisner
121469bc7bc3SAndreas Gruenbacher return err;
1215b411b363SPhilipp Reisner }
1216b411b363SPhilipp Reisner
drbd_recv_header_maybe_unplug(struct drbd_connection * connection,struct packet_info * pi)1217c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
1218c51a0ef3SLars Ellenberg {
1219c51a0ef3SLars Ellenberg void *buffer = connection->data.rbuf;
1220c51a0ef3SLars Ellenberg unsigned int size = drbd_header_size(connection);
1221c51a0ef3SLars Ellenberg int err;
1222c51a0ef3SLars Ellenberg
1223c51a0ef3SLars Ellenberg err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
1224c51a0ef3SLars Ellenberg if (err != size) {
1225c51a0ef3SLars Ellenberg /* If we have nothing in the receive buffer now, to reduce
1226c51a0ef3SLars Ellenberg * application latency, try to drain the backend queues as
1227c51a0ef3SLars Ellenberg * quickly as possible, and let remote TCP know what we have
1228c51a0ef3SLars Ellenberg * received so far. */
1229c51a0ef3SLars Ellenberg if (err == -EAGAIN) {
1230ddd061b8SChristoph Hellwig tcp_sock_set_quickack(connection->data.socket->sk, 2);
1231c51a0ef3SLars Ellenberg drbd_unplug_all_devices(connection);
1232c51a0ef3SLars Ellenberg }
1233c51a0ef3SLars Ellenberg if (err > 0) {
1234c51a0ef3SLars Ellenberg buffer += err;
1235c51a0ef3SLars Ellenberg size -= err;
1236c51a0ef3SLars Ellenberg }
1237c51a0ef3SLars Ellenberg err = drbd_recv_all_warn(connection, buffer, size);
1238c51a0ef3SLars Ellenberg if (err)
1239c51a0ef3SLars Ellenberg return err;
1240c51a0ef3SLars Ellenberg }
1241c51a0ef3SLars Ellenberg
1242c51a0ef3SLars Ellenberg err = decode_header(connection, connection->data.rbuf, pi);
1243c51a0ef3SLars Ellenberg connection->last_received = jiffies;
1244c51a0ef3SLars Ellenberg
1245c51a0ef3SLars Ellenberg return err;
1246c51a0ef3SLars Ellenberg }
1247f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous.
1248f9ff0da5SLars Ellenberg * We want to submit to all component volumes in parallel,
1249f9ff0da5SLars Ellenberg * then wait for all completions.
1250f9ff0da5SLars Ellenberg */
1251f9ff0da5SLars Ellenberg struct issue_flush_context {
1252f9ff0da5SLars Ellenberg atomic_t pending;
1253f9ff0da5SLars Ellenberg int error;
1254f9ff0da5SLars Ellenberg struct completion done;
1255f9ff0da5SLars Ellenberg };
1256f9ff0da5SLars Ellenberg struct one_flush_context {
1257f9ff0da5SLars Ellenberg struct drbd_device *device;
1258f9ff0da5SLars Ellenberg struct issue_flush_context *ctx;
1259f9ff0da5SLars Ellenberg };
1260f9ff0da5SLars Ellenberg
one_flush_endio(struct bio * bio)12611ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio)
1262f9ff0da5SLars Ellenberg {
1263f9ff0da5SLars Ellenberg struct one_flush_context *octx = bio->bi_private;
1264f9ff0da5SLars Ellenberg struct drbd_device *device = octx->device;
1265f9ff0da5SLars Ellenberg struct issue_flush_context *ctx = octx->ctx;
1266f9ff0da5SLars Ellenberg
12674e4cbee9SChristoph Hellwig if (bio->bi_status) {
12684e4cbee9SChristoph Hellwig ctx->error = blk_status_to_errno(bio->bi_status);
12694e4cbee9SChristoph Hellwig drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1270f9ff0da5SLars Ellenberg }
1271f9ff0da5SLars Ellenberg kfree(octx);
1272f9ff0da5SLars Ellenberg bio_put(bio);
1273f9ff0da5SLars Ellenberg
1274f9ff0da5SLars Ellenberg clear_bit(FLUSH_PENDING, &device->flags);
1275f9ff0da5SLars Ellenberg put_ldev(device);
1276f9ff0da5SLars Ellenberg kref_put(&device->kref, drbd_destroy_device);
1277f9ff0da5SLars Ellenberg
1278f9ff0da5SLars Ellenberg if (atomic_dec_and_test(&ctx->pending))
1279f9ff0da5SLars Ellenberg complete(&ctx->done);
1280f9ff0da5SLars Ellenberg }
1281f9ff0da5SLars Ellenberg
submit_one_flush(struct drbd_device * device,struct issue_flush_context * ctx)1282f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1283f9ff0da5SLars Ellenberg {
128407888c66SChristoph Hellwig struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
1285*3899d94eSChristoph Böhmwalder REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO);
1286f9ff0da5SLars Ellenberg struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
12874b1dc86dSChristoph Hellwig
12884b1dc86dSChristoph Hellwig if (!octx) {
12894b1dc86dSChristoph Hellwig drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
1290f9ff0da5SLars Ellenberg /* FIXME: what else can I do now? disconnecting or detaching
1291f9ff0da5SLars Ellenberg * really does not help to improve the state of the world, either.
1292f9ff0da5SLars Ellenberg */
1293f9ff0da5SLars Ellenberg bio_put(bio);
1294f9ff0da5SLars Ellenberg
1295f9ff0da5SLars Ellenberg ctx->error = -ENOMEM;
1296f9ff0da5SLars Ellenberg put_ldev(device);
1297f9ff0da5SLars Ellenberg kref_put(&device->kref, drbd_destroy_device);
1298f9ff0da5SLars Ellenberg return;
1299f9ff0da5SLars Ellenberg }
1300f9ff0da5SLars Ellenberg
1301f9ff0da5SLars Ellenberg octx->device = device;
1302f9ff0da5SLars Ellenberg octx->ctx = ctx;
1303f9ff0da5SLars Ellenberg bio->bi_private = octx;
1304f9ff0da5SLars Ellenberg bio->bi_end_io = one_flush_endio;
1305f9ff0da5SLars Ellenberg
1306f9ff0da5SLars Ellenberg device->flush_jif = jiffies;
1307f9ff0da5SLars Ellenberg set_bit(FLUSH_PENDING, &device->flags);
1308f9ff0da5SLars Ellenberg atomic_inc(&ctx->pending);
1309f9ff0da5SLars Ellenberg submit_bio(bio);
1310f9ff0da5SLars Ellenberg }
1311f9ff0da5SLars Ellenberg
drbd_flush(struct drbd_connection * connection)1312bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection)
1313b411b363SPhilipp Reisner {
1314f9ff0da5SLars Ellenberg if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1315c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device;
1316f9ff0da5SLars Ellenberg struct issue_flush_context ctx;
13174b0007c0SPhilipp Reisner int vnr;
1318b411b363SPhilipp Reisner
1319f9ff0da5SLars Ellenberg atomic_set(&ctx.pending, 1);
1320f9ff0da5SLars Ellenberg ctx.error = 0;
1321f9ff0da5SLars Ellenberg init_completion(&ctx.done);
1322f9ff0da5SLars Ellenberg
1323615e087fSLars Ellenberg rcu_read_lock();
1324c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1325c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
1326c06ece6bSAndreas Gruenbacher
1327b30ab791SAndreas Gruenbacher if (!get_ldev(device))
1328615e087fSLars Ellenberg continue;
1329b30ab791SAndreas Gruenbacher kref_get(&device->kref);
1330615e087fSLars Ellenberg rcu_read_unlock();
13314b0007c0SPhilipp Reisner
1332f9ff0da5SLars Ellenberg submit_one_flush(device, &ctx);
1333f9ff0da5SLars Ellenberg
1334f9ff0da5SLars Ellenberg rcu_read_lock();
1335f9ff0da5SLars Ellenberg }
1336f9ff0da5SLars Ellenberg rcu_read_unlock();
1337f9ff0da5SLars Ellenberg
1338f9ff0da5SLars Ellenberg /* Do we want to add a timeout,
1339f9ff0da5SLars Ellenberg * if disk-timeout is set? */
1340f9ff0da5SLars Ellenberg if (!atomic_dec_and_test(&ctx.pending))
1341f9ff0da5SLars Ellenberg wait_for_completion(&ctx.done);
1342f9ff0da5SLars Ellenberg
1343f9ff0da5SLars Ellenberg if (ctx.error) {
1344b411b363SPhilipp Reisner /* would rather check on EOPNOTSUPP, but that is not reliable.
1345b411b363SPhilipp Reisner * don't try again for ANY return value != 0
1346b411b363SPhilipp Reisner * if (rv == -EOPNOTSUPP) */
1347f9ff0da5SLars Ellenberg /* Any error is already reported by bio_endio callback. */
1348f6ba8636SAndreas Gruenbacher drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1349b411b363SPhilipp Reisner }
1350b411b363SPhilipp Reisner }
1351b411b363SPhilipp Reisner }
1352b411b363SPhilipp Reisner
1353b411b363SPhilipp Reisner /**
1354b411b363SPhilipp Reisner * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
13559b48ff07SLee Jones * @connection: DRBD connection.
1356b411b363SPhilipp Reisner * @epoch: Epoch object.
1357b411b363SPhilipp Reisner * @ev: Epoch event.
1358b411b363SPhilipp Reisner */
drbd_may_finish_epoch(struct drbd_connection * connection,struct drbd_epoch * epoch,enum epoch_event ev)1359bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1360b411b363SPhilipp Reisner struct drbd_epoch *epoch,
1361b411b363SPhilipp Reisner enum epoch_event ev)
1362b411b363SPhilipp Reisner {
13632451fc3bSPhilipp Reisner int epoch_size;
1364b411b363SPhilipp Reisner struct drbd_epoch *next_epoch;
1365b411b363SPhilipp Reisner enum finish_epoch rv = FE_STILL_LIVE;
1366b411b363SPhilipp Reisner
1367bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock);
1368b411b363SPhilipp Reisner do {
1369b411b363SPhilipp Reisner next_epoch = NULL;
1370b411b363SPhilipp Reisner
1371b411b363SPhilipp Reisner epoch_size = atomic_read(&epoch->epoch_size);
1372b411b363SPhilipp Reisner
1373b411b363SPhilipp Reisner switch (ev & ~EV_CLEANUP) {
1374b411b363SPhilipp Reisner case EV_PUT:
1375b411b363SPhilipp Reisner atomic_dec(&epoch->active);
1376b411b363SPhilipp Reisner break;
1377b411b363SPhilipp Reisner case EV_GOT_BARRIER_NR:
1378b411b363SPhilipp Reisner set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1379b411b363SPhilipp Reisner break;
1380b411b363SPhilipp Reisner case EV_BECAME_LAST:
1381b411b363SPhilipp Reisner /* nothing to do*/
1382b411b363SPhilipp Reisner break;
1383b411b363SPhilipp Reisner }
1384b411b363SPhilipp Reisner
1385b411b363SPhilipp Reisner if (epoch_size != 0 &&
1386b411b363SPhilipp Reisner atomic_read(&epoch->active) == 0 &&
138780f9fd55SPhilipp Reisner (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1388b411b363SPhilipp Reisner if (!(ev & EV_CLEANUP)) {
1389bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock);
1390bde89a9eSAndreas Gruenbacher drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1391bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock);
1392b411b363SPhilipp Reisner }
13939ed57dcbSLars Ellenberg #if 0
13949ed57dcbSLars Ellenberg /* FIXME: dec unacked on connection, once we have
13959ed57dcbSLars Ellenberg * something to count pending connection packets in. */
139680f9fd55SPhilipp Reisner if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1397bde89a9eSAndreas Gruenbacher dec_unacked(epoch->connection);
13989ed57dcbSLars Ellenberg #endif
1399b411b363SPhilipp Reisner
1400bde89a9eSAndreas Gruenbacher if (connection->current_epoch != epoch) {
1401b411b363SPhilipp Reisner next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1402b411b363SPhilipp Reisner list_del(&epoch->list);
1403b411b363SPhilipp Reisner ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1404bde89a9eSAndreas Gruenbacher connection->epochs--;
1405b411b363SPhilipp Reisner kfree(epoch);
1406b411b363SPhilipp Reisner
1407b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE)
1408b411b363SPhilipp Reisner rv = FE_DESTROYED;
1409b411b363SPhilipp Reisner } else {
1410b411b363SPhilipp Reisner epoch->flags = 0;
1411b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0);
1412698f9315SUwe Kleine-König /* atomic_set(&epoch->active, 0); is already zero */
1413b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE)
1414b411b363SPhilipp Reisner rv = FE_RECYCLED;
1415b411b363SPhilipp Reisner }
1416b411b363SPhilipp Reisner }
1417b411b363SPhilipp Reisner
1418b411b363SPhilipp Reisner if (!next_epoch)
1419b411b363SPhilipp Reisner break;
1420b411b363SPhilipp Reisner
1421b411b363SPhilipp Reisner epoch = next_epoch;
1422b411b363SPhilipp Reisner } while (1);
1423b411b363SPhilipp Reisner
1424bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock);
1425b411b363SPhilipp Reisner
1426b411b363SPhilipp Reisner return rv;
1427b411b363SPhilipp Reisner }
1428b411b363SPhilipp Reisner
14298fe39aacSPhilipp Reisner static enum write_ordering_e
max_allowed_wo(struct drbd_backing_dev * bdev,enum write_ordering_e wo)14308fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
14318fe39aacSPhilipp Reisner {
14328fe39aacSPhilipp Reisner struct disk_conf *dc;
14338fe39aacSPhilipp Reisner
14348fe39aacSPhilipp Reisner dc = rcu_dereference(bdev->disk_conf);
14358fe39aacSPhilipp Reisner
1436f6ba8636SAndreas Gruenbacher if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1437f6ba8636SAndreas Gruenbacher wo = WO_DRAIN_IO;
1438f6ba8636SAndreas Gruenbacher if (wo == WO_DRAIN_IO && !dc->disk_drain)
1439f6ba8636SAndreas Gruenbacher wo = WO_NONE;
14408fe39aacSPhilipp Reisner
14418fe39aacSPhilipp Reisner return wo;
14428fe39aacSPhilipp Reisner }
14438fe39aacSPhilipp Reisner
14449b48ff07SLee Jones /*
1445b411b363SPhilipp Reisner * drbd_bump_write_ordering() - Fall back to an other write ordering method
1446b411b363SPhilipp Reisner * @wo: Write ordering method to try.
1447b411b363SPhilipp Reisner */
drbd_bump_write_ordering(struct drbd_resource * resource,struct drbd_backing_dev * bdev,enum write_ordering_e wo)14488fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
14498fe39aacSPhilipp Reisner enum write_ordering_e wo)
1450b411b363SPhilipp Reisner {
1451e9526580SPhilipp Reisner struct drbd_device *device;
1452b411b363SPhilipp Reisner enum write_ordering_e pwo;
14534b0007c0SPhilipp Reisner int vnr;
1454b411b363SPhilipp Reisner static char *write_ordering_str[] = {
1455f6ba8636SAndreas Gruenbacher [WO_NONE] = "none",
1456f6ba8636SAndreas Gruenbacher [WO_DRAIN_IO] = "drain",
1457f6ba8636SAndreas Gruenbacher [WO_BDEV_FLUSH] = "flush",
1458b411b363SPhilipp Reisner };
1459b411b363SPhilipp Reisner
1460e9526580SPhilipp Reisner pwo = resource->write_ordering;
1461f6ba8636SAndreas Gruenbacher if (wo != WO_BDEV_FLUSH)
1462b411b363SPhilipp Reisner wo = min(pwo, wo);
1463daeda1ccSPhilipp Reisner rcu_read_lock();
1464e9526580SPhilipp Reisner idr_for_each_entry(&resource->devices, device, vnr) {
14658fe39aacSPhilipp Reisner if (get_ldev(device)) {
14668fe39aacSPhilipp Reisner wo = max_allowed_wo(device->ldev, wo);
14678fe39aacSPhilipp Reisner if (device->ldev == bdev)
14688fe39aacSPhilipp Reisner bdev = NULL;
1469b30ab791SAndreas Gruenbacher put_ldev(device);
14704b0007c0SPhilipp Reisner }
14718fe39aacSPhilipp Reisner }
14728fe39aacSPhilipp Reisner
14738fe39aacSPhilipp Reisner if (bdev)
14748fe39aacSPhilipp Reisner wo = max_allowed_wo(bdev, wo);
14758fe39aacSPhilipp Reisner
147670df7092SLars Ellenberg rcu_read_unlock();
147770df7092SLars Ellenberg
1478e9526580SPhilipp Reisner resource->write_ordering = wo;
1479f6ba8636SAndreas Gruenbacher if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1480e9526580SPhilipp Reisner drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1481b411b363SPhilipp Reisner }
1482b411b363SPhilipp Reisner
1483f31e583aSLars Ellenberg /*
1484f31e583aSLars Ellenberg * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
1485f31e583aSLars Ellenberg * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
1486f31e583aSLars Ellenberg * will directly go to fallback mode, submitting normal writes, and
1487f31e583aSLars Ellenberg * never even try to UNMAP.
1488f31e583aSLars Ellenberg *
1489f31e583aSLars Ellenberg * And dm-thin does not do this (yet), mostly because in general it has
1490f31e583aSLars Ellenberg * to assume that "skip_block_zeroing" is set. See also:
1491f31e583aSLars Ellenberg * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
1492f31e583aSLars Ellenberg * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
1493f31e583aSLars Ellenberg *
1494f31e583aSLars Ellenberg * We *may* ignore the discard-zeroes-data setting, if so configured.
1495f31e583aSLars Ellenberg *
1496f31e583aSLars Ellenberg * Assumption is that this "discard_zeroes_data=0" is only because the backend
1497f31e583aSLars Ellenberg * may ignore partial unaligned discards.
1498f31e583aSLars Ellenberg *
1499f31e583aSLars Ellenberg * LVM/DM thin as of at least
1500f31e583aSLars Ellenberg * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
1501f31e583aSLars Ellenberg * Library version: 1.02.93-RHEL7 (2015-01-28)
1502f31e583aSLars Ellenberg * Driver version: 4.29.0
1503f31e583aSLars Ellenberg * still behaves this way.
1504f31e583aSLars Ellenberg *
1505f31e583aSLars Ellenberg * For unaligned (wrt. alignment and granularity) or too small discards,
1506f31e583aSLars Ellenberg * we zero-out the initial (and/or) trailing unaligned partial chunks,
1507f31e583aSLars Ellenberg * but discard all the aligned full chunks.
1508f31e583aSLars Ellenberg *
1509f31e583aSLars Ellenberg * At least for LVM/DM thin, with skip_block_zeroing=false,
1510f31e583aSLars Ellenberg * the result is effectively "discard_zeroes_data=1".
1511f31e583aSLars Ellenberg */
1512f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */
drbd_issue_discard_or_zero_out(struct drbd_device * device,sector_t start,unsigned int nr_sectors,int flags)1513f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
1514dd4f699dSLars Ellenberg {
15150dbed96aSChristoph Hellwig struct block_device *bdev = device->ldev->backing_bdev;
1516f31e583aSLars Ellenberg sector_t tmp, nr;
1517f31e583aSLars Ellenberg unsigned int max_discard_sectors, granularity;
1518f31e583aSLars Ellenberg int alignment;
1519f31e583aSLars Ellenberg int err = 0;
1520dd4f699dSLars Ellenberg
1521f31e583aSLars Ellenberg if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
1522f31e583aSLars Ellenberg goto zero_out;
1523f31e583aSLars Ellenberg
1524f31e583aSLars Ellenberg /* Zero-sector (unknown) and one-sector granularities are the same. */
15257b47ef52SChristoph Hellwig granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
1526f31e583aSLars Ellenberg alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1527f31e583aSLars Ellenberg
1528cf0fbf89SChristoph Hellwig max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
1529f31e583aSLars Ellenberg max_discard_sectors -= max_discard_sectors % granularity;
1530f31e583aSLars Ellenberg if (unlikely(!max_discard_sectors))
1531f31e583aSLars Ellenberg goto zero_out;
1532f31e583aSLars Ellenberg
1533f31e583aSLars Ellenberg if (nr_sectors < granularity)
1534f31e583aSLars Ellenberg goto zero_out;
1535f31e583aSLars Ellenberg
1536f31e583aSLars Ellenberg tmp = start;
1537f31e583aSLars Ellenberg if (sector_div(tmp, granularity) != alignment) {
1538f31e583aSLars Ellenberg if (nr_sectors < 2*granularity)
1539f31e583aSLars Ellenberg goto zero_out;
1540f31e583aSLars Ellenberg /* start + gran - (start + gran - align) % gran */
1541f31e583aSLars Ellenberg tmp = start + granularity - alignment;
1542f31e583aSLars Ellenberg tmp = start + granularity - sector_div(tmp, granularity);
1543f31e583aSLars Ellenberg
1544f31e583aSLars Ellenberg nr = tmp - start;
1545f31e583aSLars Ellenberg /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
1546f31e583aSLars Ellenberg * layers are below us, some may have smaller granularity */
1547f31e583aSLars Ellenberg err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1548f31e583aSLars Ellenberg nr_sectors -= nr;
1549f31e583aSLars Ellenberg start = tmp;
1550f31e583aSLars Ellenberg }
1551f31e583aSLars Ellenberg while (nr_sectors >= max_discard_sectors) {
155244abff2cSChristoph Hellwig err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
155344abff2cSChristoph Hellwig GFP_NOIO);
1554f31e583aSLars Ellenberg nr_sectors -= max_discard_sectors;
1555f31e583aSLars Ellenberg start += max_discard_sectors;
1556f31e583aSLars Ellenberg }
1557f31e583aSLars Ellenberg if (nr_sectors) {
1558f31e583aSLars Ellenberg /* max_discard_sectors is unsigned int (and a multiple of
1559f31e583aSLars Ellenberg * granularity, we made sure of that above already);
1560f31e583aSLars Ellenberg * nr is < max_discard_sectors;
1561f31e583aSLars Ellenberg * I don't need sector_div here, even though nr is sector_t */
1562f31e583aSLars Ellenberg nr = nr_sectors;
1563f31e583aSLars Ellenberg nr -= (unsigned int)nr % granularity;
1564f31e583aSLars Ellenberg if (nr) {
156544abff2cSChristoph Hellwig err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
1566f31e583aSLars Ellenberg nr_sectors -= nr;
1567f31e583aSLars Ellenberg start += nr;
1568f31e583aSLars Ellenberg }
1569f31e583aSLars Ellenberg }
1570f31e583aSLars Ellenberg zero_out:
1571f31e583aSLars Ellenberg if (nr_sectors) {
1572f31e583aSLars Ellenberg err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
1573f31e583aSLars Ellenberg (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
1574f31e583aSLars Ellenberg }
1575f31e583aSLars Ellenberg return err != 0;
1576f31e583aSLars Ellenberg }
1577f31e583aSLars Ellenberg
can_do_reliable_discards(struct drbd_device * device)1578f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device)
1579f31e583aSLars Ellenberg {
1580f31e583aSLars Ellenberg struct disk_conf *dc;
1581f31e583aSLars Ellenberg bool can_do;
1582f31e583aSLars Ellenberg
158370200574SChristoph Hellwig if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
1584f31e583aSLars Ellenberg return false;
1585f31e583aSLars Ellenberg
1586f31e583aSLars Ellenberg rcu_read_lock();
1587f31e583aSLars Ellenberg dc = rcu_dereference(device->ldev->disk_conf);
1588f31e583aSLars Ellenberg can_do = dc->discard_zeroes_if_aligned;
1589f31e583aSLars Ellenberg rcu_read_unlock();
1590f31e583aSLars Ellenberg return can_do;
1591f31e583aSLars Ellenberg }
1592f31e583aSLars Ellenberg
drbd_issue_peer_discard_or_zero_out(struct drbd_device * device,struct drbd_peer_request * peer_req)1593f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
1594f31e583aSLars Ellenberg {
1595f31e583aSLars Ellenberg /* If the backend cannot discard, or does not guarantee
1596f31e583aSLars Ellenberg * read-back zeroes in discarded ranges, we fall back to
1597f31e583aSLars Ellenberg * zero-out. Unless configuration specifically requested
1598f31e583aSLars Ellenberg * otherwise. */
1599f31e583aSLars Ellenberg if (!can_do_reliable_discards(device))
1600f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT;
1601f31e583aSLars Ellenberg
1602f31e583aSLars Ellenberg if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1603f31e583aSLars Ellenberg peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
1604dd4f699dSLars Ellenberg peer_req->flags |= EE_WAS_ERROR;
1605dd4f699dSLars Ellenberg drbd_endio_write_sec_final(peer_req);
1606dd4f699dSLars Ellenberg }
1607dd4f699dSLars Ellenberg
peer_request_fault_type(struct drbd_peer_request * peer_req)1608ce668b6dSChristoph Böhmwalder static int peer_request_fault_type(struct drbd_peer_request *peer_req)
1609ce668b6dSChristoph Böhmwalder {
1610ce668b6dSChristoph Böhmwalder if (peer_req_op(peer_req) == REQ_OP_READ) {
1611ce668b6dSChristoph Böhmwalder return peer_req->flags & EE_APPLICATION ?
1612ce668b6dSChristoph Böhmwalder DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
1613ce668b6dSChristoph Böhmwalder } else {
1614ce668b6dSChristoph Böhmwalder return peer_req->flags & EE_APPLICATION ?
1615ce668b6dSChristoph Böhmwalder DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
1616ce668b6dSChristoph Böhmwalder }
1617ce668b6dSChristoph Böhmwalder }
1618ce668b6dSChristoph Böhmwalder
1619a34592ffSChristoph Hellwig /**
1620fbe29decSAndreas Gruenbacher * drbd_submit_peer_request()
1621db830c46SAndreas Gruenbacher * @peer_req: peer request
162210f6d992SLars Ellenberg *
162310f6d992SLars Ellenberg * May spread the pages to multiple bios,
162410f6d992SLars Ellenberg * depending on bio_add_page restrictions.
162510f6d992SLars Ellenberg *
162610f6d992SLars Ellenberg * Returns 0 if all bios have been submitted,
162710f6d992SLars Ellenberg * -ENOMEM if we could not allocate enough bios,
162810f6d992SLars Ellenberg * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
162910f6d992SLars Ellenberg * single page to an empty bio (which should never happen and likely indicates
163010f6d992SLars Ellenberg * that the lower level IO stack is in some way broken). This has been observed
163110f6d992SLars Ellenberg * on certain Xen deployments.
163245bb912bSLars Ellenberg */
163345bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */
drbd_submit_peer_request(struct drbd_peer_request * peer_req)1634ce668b6dSChristoph Böhmwalder int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
163545bb912bSLars Ellenberg {
1636ce668b6dSChristoph Böhmwalder struct drbd_device *device = peer_req->peer_device->device;
163745bb912bSLars Ellenberg struct bio *bios = NULL;
163845bb912bSLars Ellenberg struct bio *bio;
1639db830c46SAndreas Gruenbacher struct page *page = peer_req->pages;
1640db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector;
1641e6be38a1SCai Huoqing unsigned int data_size = peer_req->i.size;
1642e6be38a1SCai Huoqing unsigned int n_bios = 0;
1643e6be38a1SCai Huoqing unsigned int nr_pages = PFN_UP(data_size);
164445bb912bSLars Ellenberg
1645dd4f699dSLars Ellenberg /* TRIM/DISCARD: for now, always use the helper function
1646dd4f699dSLars Ellenberg * blkdev_issue_zeroout(..., discard=true).
1647dd4f699dSLars Ellenberg * It's synchronous, but it does the right thing wrt. bio splitting.
1648dd4f699dSLars Ellenberg * Correctness first, performance later. Next step is to code an
1649dd4f699dSLars Ellenberg * asynchronous variant of the same.
1650dd4f699dSLars Ellenberg */
1651a34592ffSChristoph Hellwig if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
1652a0fb3c47SLars Ellenberg /* wait for all pending IO completions, before we start
1653a0fb3c47SLars Ellenberg * zeroing things out. */
16545dd2ca19SAndreas Gruenbacher conn_wait_active_ee_empty(peer_req->peer_device->connection);
165545d2933cSLars Ellenberg /* add it to the active list now,
165645d2933cSLars Ellenberg * so we can find it to present it in debugfs */
165721ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies;
165821ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED;
1659700ca8c0SPhilipp Reisner
1660700ca8c0SPhilipp Reisner /* If this was a resync request from receive_rs_deallocated(),
1661700ca8c0SPhilipp Reisner * it is already on the sync_ee list */
1662700ca8c0SPhilipp Reisner if (list_empty(&peer_req->w.list)) {
166345d2933cSLars Ellenberg spin_lock_irq(&device->resource->req_lock);
166445d2933cSLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee);
166545d2933cSLars Ellenberg spin_unlock_irq(&device->resource->req_lock);
1666700ca8c0SPhilipp Reisner }
1667700ca8c0SPhilipp Reisner
1668f31e583aSLars Ellenberg drbd_issue_peer_discard_or_zero_out(device, peer_req);
1669a0fb3c47SLars Ellenberg return 0;
1670a0fb3c47SLars Ellenberg }
1671a0fb3c47SLars Ellenberg
167245bb912bSLars Ellenberg /* In most cases, we will only need one bio. But in case the lower
167345bb912bSLars Ellenberg * level restrictions happen to be different at this offset on this
167445bb912bSLars Ellenberg * side than those of the sending peer, we may need to submit the
16759476f39dSLars Ellenberg * request in more than one bio.
16769476f39dSLars Ellenberg *
16779476f39dSLars Ellenberg * Plain bio_alloc is good enough here, this is no DRBD internally
16789476f39dSLars Ellenberg * generated bio, but a bio allocated on behalf of the peer.
16799476f39dSLars Ellenberg */
168045bb912bSLars Ellenberg next_bio:
1681ce668b6dSChristoph Böhmwalder /* _DISCARD, _WRITE_ZEROES handled above.
1682ce668b6dSChristoph Böhmwalder * REQ_OP_FLUSH (empty flush) not expected,
1683ce668b6dSChristoph Böhmwalder * should have been mapped to a "drbd protocol barrier".
1684ce668b6dSChristoph Böhmwalder * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
1685ce668b6dSChristoph Böhmwalder */
1686ce668b6dSChristoph Böhmwalder if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
1687ce668b6dSChristoph Böhmwalder peer_req_op(peer_req) == REQ_OP_READ)) {
1688ce668b6dSChristoph Böhmwalder drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
1689ce668b6dSChristoph Böhmwalder return -EINVAL;
1690ce668b6dSChristoph Böhmwalder }
1691ce668b6dSChristoph Böhmwalder
1692ce668b6dSChristoph Böhmwalder bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
1693db830c46SAndreas Gruenbacher /* > peer_req->i.sector, unless this is the first bio */
16944f024f37SKent Overstreet bio->bi_iter.bi_sector = sector;
1695db830c46SAndreas Gruenbacher bio->bi_private = peer_req;
1696fcefa62eSAndreas Gruenbacher bio->bi_end_io = drbd_peer_request_endio;
169745bb912bSLars Ellenberg
169845bb912bSLars Ellenberg bio->bi_next = bios;
169945bb912bSLars Ellenberg bios = bio;
170045bb912bSLars Ellenberg ++n_bios;
170145bb912bSLars Ellenberg
170245bb912bSLars Ellenberg page_chain_for_each(page) {
170311f8b2b6SAndreas Gruenbacher unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170406efffdaSMing Lei if (!bio_add_page(bio, page, len, 0))
170545bb912bSLars Ellenberg goto next_bio;
170611f8b2b6SAndreas Gruenbacher data_size -= len;
170745bb912bSLars Ellenberg sector += len >> 9;
170845bb912bSLars Ellenberg --nr_pages;
170945bb912bSLars Ellenberg }
171011f8b2b6SAndreas Gruenbacher D_ASSERT(device, data_size == 0);
1711a0fb3c47SLars Ellenberg D_ASSERT(device, page == NULL);
171245bb912bSLars Ellenberg
1713db830c46SAndreas Gruenbacher atomic_set(&peer_req->pending_bios, n_bios);
171421ae5d7fSLars Ellenberg /* for debugfs: update timestamp, mark as submitted */
171521ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies;
171621ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED;
171745bb912bSLars Ellenberg do {
171845bb912bSLars Ellenberg bio = bios;
171945bb912bSLars Ellenberg bios = bios->bi_next;
172045bb912bSLars Ellenberg bio->bi_next = NULL;
172145bb912bSLars Ellenberg
1722ce668b6dSChristoph Böhmwalder drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
172345bb912bSLars Ellenberg } while (bios);
172445bb912bSLars Ellenberg return 0;
172545bb912bSLars Ellenberg }
172645bb912bSLars Ellenberg
drbd_remove_epoch_entry_interval(struct drbd_device * device,struct drbd_peer_request * peer_req)1727b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1728db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req)
172953840641SAndreas Gruenbacher {
1730db830c46SAndreas Gruenbacher struct drbd_interval *i = &peer_req->i;
173153840641SAndreas Gruenbacher
1732b30ab791SAndreas Gruenbacher drbd_remove_interval(&device->write_requests, i);
173353840641SAndreas Gruenbacher drbd_clear_interval(i);
173453840641SAndreas Gruenbacher
17356c852becSAndreas Gruenbacher /* Wake up any processes waiting for this peer request to complete. */
173653840641SAndreas Gruenbacher if (i->waiting)
1737b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait);
173853840641SAndreas Gruenbacher }
173953840641SAndreas Gruenbacher
conn_wait_active_ee_empty(struct drbd_connection * connection)1740bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection)
174177fede51SPhilipp Reisner {
1742c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device;
174377fede51SPhilipp Reisner int vnr;
174477fede51SPhilipp Reisner
174577fede51SPhilipp Reisner rcu_read_lock();
1746c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1747c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
1748c06ece6bSAndreas Gruenbacher
1749b30ab791SAndreas Gruenbacher kref_get(&device->kref);
175077fede51SPhilipp Reisner rcu_read_unlock();
1751b30ab791SAndreas Gruenbacher drbd_wait_ee_list_empty(device, &device->active_ee);
175205a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device);
175377fede51SPhilipp Reisner rcu_read_lock();
175477fede51SPhilipp Reisner }
175577fede51SPhilipp Reisner rcu_read_unlock();
175677fede51SPhilipp Reisner }
175777fede51SPhilipp Reisner
receive_Barrier(struct drbd_connection * connection,struct packet_info * pi)1758bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1759b411b363SPhilipp Reisner {
17602451fc3bSPhilipp Reisner int rv;
1761e658983aSAndreas Gruenbacher struct p_barrier *p = pi->data;
1762b411b363SPhilipp Reisner struct drbd_epoch *epoch;
1763b411b363SPhilipp Reisner
17649ed57dcbSLars Ellenberg /* FIXME these are unacked on connection,
17659ed57dcbSLars Ellenberg * not a specific (peer)device.
17669ed57dcbSLars Ellenberg */
1767bde89a9eSAndreas Gruenbacher connection->current_epoch->barrier_nr = p->barrier;
1768bde89a9eSAndreas Gruenbacher connection->current_epoch->connection = connection;
1769bde89a9eSAndreas Gruenbacher rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1770b411b363SPhilipp Reisner
1771b411b363SPhilipp Reisner /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1772b411b363SPhilipp Reisner * the activity log, which means it would not be resynced in case the
1773b411b363SPhilipp Reisner * R_PRIMARY crashes now.
1774b411b363SPhilipp Reisner * Therefore we must send the barrier_ack after the barrier request was
1775b411b363SPhilipp Reisner * completed. */
1776e9526580SPhilipp Reisner switch (connection->resource->write_ordering) {
1777f6ba8636SAndreas Gruenbacher case WO_NONE:
1778b411b363SPhilipp Reisner if (rv == FE_RECYCLED)
177982bc0194SAndreas Gruenbacher return 0;
1780b411b363SPhilipp Reisner
1781b411b363SPhilipp Reisner /* receiver context, in the writeout path of the other node.
1782b411b363SPhilipp Reisner * avoid potential distributed deadlock */
1783b411b363SPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17842451fc3bSPhilipp Reisner if (epoch)
17852451fc3bSPhilipp Reisner break;
17862451fc3bSPhilipp Reisner else
17871ec861ebSAndreas Gruenbacher drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1788df561f66SGustavo A. R. Silva fallthrough;
17892451fc3bSPhilipp Reisner
1790f6ba8636SAndreas Gruenbacher case WO_BDEV_FLUSH:
1791f6ba8636SAndreas Gruenbacher case WO_DRAIN_IO:
1792bde89a9eSAndreas Gruenbacher conn_wait_active_ee_empty(connection);
1793bde89a9eSAndreas Gruenbacher drbd_flush(connection);
17942451fc3bSPhilipp Reisner
1795bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) {
17962451fc3bSPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
17972451fc3bSPhilipp Reisner if (epoch)
17982451fc3bSPhilipp Reisner break;
1799b411b363SPhilipp Reisner }
1800b411b363SPhilipp Reisner
180182bc0194SAndreas Gruenbacher return 0;
18022451fc3bSPhilipp Reisner default:
1803e9526580SPhilipp Reisner drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1804e9526580SPhilipp Reisner connection->resource->write_ordering);
180582bc0194SAndreas Gruenbacher return -EIO;
1806b411b363SPhilipp Reisner }
1807b411b363SPhilipp Reisner
1808b411b363SPhilipp Reisner epoch->flags = 0;
1809b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0);
1810b411b363SPhilipp Reisner atomic_set(&epoch->active, 0);
1811b411b363SPhilipp Reisner
1812bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock);
1813bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) {
1814bde89a9eSAndreas Gruenbacher list_add(&epoch->list, &connection->current_epoch->list);
1815bde89a9eSAndreas Gruenbacher connection->current_epoch = epoch;
1816bde89a9eSAndreas Gruenbacher connection->epochs++;
1817b411b363SPhilipp Reisner } else {
1818b411b363SPhilipp Reisner /* The current_epoch got recycled while we allocated this one... */
1819b411b363SPhilipp Reisner kfree(epoch);
1820b411b363SPhilipp Reisner }
1821bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock);
1822b411b363SPhilipp Reisner
182382bc0194SAndreas Gruenbacher return 0;
1824b411b363SPhilipp Reisner }
1825b411b363SPhilipp Reisner
18269104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */
drbd_csum_ee_size(struct crypto_shash * h,struct drbd_peer_request * r,void * d,unsigned int payload_size)18273d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h,
18289104d31aSLars Ellenberg struct drbd_peer_request *r, void *d,
18299104d31aSLars Ellenberg unsigned int payload_size)
18309104d31aSLars Ellenberg {
18319104d31aSLars Ellenberg unsigned int tmp = r->i.size;
18329104d31aSLars Ellenberg r->i.size = payload_size;
18339104d31aSLars Ellenberg drbd_csum_ee(h, r, d);
18349104d31aSLars Ellenberg r->i.size = tmp;
18359104d31aSLars Ellenberg }
18369104d31aSLars Ellenberg
1837b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read)
18389104d31aSLars Ellenberg * and from receive_Data.
18399104d31aSLars Ellenberg * data_size: actual payload ("data in")
18409104d31aSLars Ellenberg * for normal writes that is bi_size.
18419104d31aSLars Ellenberg * for discards, that is zero.
18429104d31aSLars Ellenberg * for write same, it is logical_block_size.
18439104d31aSLars Ellenberg * both trim and write same have the bi_size ("data len to be affected")
18449104d31aSLars Ellenberg * as extra argument in the packet header.
18459104d31aSLars Ellenberg */
1846f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request *
read_in_block(struct drbd_peer_device * peer_device,u64 id,sector_t sector,struct packet_info * pi)184769a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1848a0fb3c47SLars Ellenberg struct packet_info *pi) __must_hold(local)
1849b411b363SPhilipp Reisner {
185069a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
1851155bd9d1SChristoph Hellwig const sector_t capacity = get_capacity(device->vdisk);
1852db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req;
1853b411b363SPhilipp Reisner struct page *page;
185411f8b2b6SAndreas Gruenbacher int digest_size, err;
185511f8b2b6SAndreas Gruenbacher unsigned int data_size = pi->size, ds;
185669a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in;
185769a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv;
18586b4388acSPhilipp Reisner unsigned long *data;
1859a0fb3c47SLars Ellenberg struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1860f31e583aSLars Ellenberg struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
1861b411b363SPhilipp Reisner
186211f8b2b6SAndreas Gruenbacher digest_size = 0;
1863a0fb3c47SLars Ellenberg if (!trim && peer_device->connection->peer_integrity_tfm) {
18643d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
18659f5bdc33SAndreas Gruenbacher /*
18669f5bdc33SAndreas Gruenbacher * FIXME: Receive the incoming digest into the receive buffer
18679f5bdc33SAndreas Gruenbacher * here, together with its struct p_data?
18689f5bdc33SAndreas Gruenbacher */
186911f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1870a5c31904SAndreas Gruenbacher if (err)
1871b411b363SPhilipp Reisner return NULL;
187211f8b2b6SAndreas Gruenbacher data_size -= digest_size;
187388104ca4SAndreas Gruenbacher }
1874b411b363SPhilipp Reisner
1875a34592ffSChristoph Hellwig /* assume request_size == data_size, but special case trim. */
18769104d31aSLars Ellenberg ds = data_size;
1877a0fb3c47SLars Ellenberg if (trim) {
1878677b3672SChristoph Böhmwalder if (!expect(peer_device, data_size == 0))
18799104d31aSLars Ellenberg return NULL;
18809104d31aSLars Ellenberg ds = be32_to_cpu(trim->size);
1881f31e583aSLars Ellenberg } else if (zeroes) {
1882677b3672SChristoph Böhmwalder if (!expect(peer_device, data_size == 0))
1883f31e583aSLars Ellenberg return NULL;
1884f31e583aSLars Ellenberg ds = be32_to_cpu(zeroes->size);
1885a0fb3c47SLars Ellenberg }
1886a0fb3c47SLars Ellenberg
1887677b3672SChristoph Böhmwalder if (!expect(peer_device, IS_ALIGNED(ds, 512)))
1888841ce241SAndreas Gruenbacher return NULL;
1889a34592ffSChristoph Hellwig if (trim || zeroes) {
1890677b3672SChristoph Böhmwalder if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
18919104d31aSLars Ellenberg return NULL;
1892677b3672SChristoph Böhmwalder } else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE))
1893841ce241SAndreas Gruenbacher return NULL;
1894b411b363SPhilipp Reisner
18956666032aSLars Ellenberg /* even though we trust out peer,
18966666032aSLars Ellenberg * we sometimes have to double check. */
18979104d31aSLars Ellenberg if (sector + (ds>>9) > capacity) {
1898d0180171SAndreas Gruenbacher drbd_err(device, "request from peer beyond end of local disk: "
1899fdda6544SLars Ellenberg "capacity: %llus < sector: %llus + size: %u\n",
19006666032aSLars Ellenberg (unsigned long long)capacity,
19019104d31aSLars Ellenberg (unsigned long long)sector, ds);
19026666032aSLars Ellenberg return NULL;
19036666032aSLars Ellenberg }
19046666032aSLars Ellenberg
1905b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1906b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD,
1907b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */
19089104d31aSLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1909db830c46SAndreas Gruenbacher if (!peer_req)
1910b411b363SPhilipp Reisner return NULL;
191145bb912bSLars Ellenberg
191221ae5d7fSLars Ellenberg peer_req->flags |= EE_WRITE;
19139104d31aSLars Ellenberg if (trim) {
1914f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM;
1915f31e583aSLars Ellenberg return peer_req;
1916f31e583aSLars Ellenberg }
1917f31e583aSLars Ellenberg if (zeroes) {
1918f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT;
191981a3537aSLars Ellenberg return peer_req;
19209104d31aSLars Ellenberg }
1921a73ff323SLars Ellenberg
19229104d31aSLars Ellenberg /* receive payload size bytes into page chain */
1923b411b363SPhilipp Reisner ds = data_size;
1924db830c46SAndreas Gruenbacher page = peer_req->pages;
192545bb912bSLars Ellenberg page_chain_for_each(page) {
192645bb912bSLars Ellenberg unsigned len = min_t(int, ds, PAGE_SIZE);
19276b4388acSPhilipp Reisner data = kmap(page);
192869a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len);
1929b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1930d0180171SAndreas Gruenbacher drbd_err(device, "Fault injection: Corrupting data on receive\n");
19316b4388acSPhilipp Reisner data[0] = data[0] ^ (unsigned long)-1;
19326b4388acSPhilipp Reisner }
1933b411b363SPhilipp Reisner kunmap(page);
1934a5c31904SAndreas Gruenbacher if (err) {
1935b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
1936b411b363SPhilipp Reisner return NULL;
1937b411b363SPhilipp Reisner }
1938a5c31904SAndreas Gruenbacher ds -= len;
1939b411b363SPhilipp Reisner }
1940b411b363SPhilipp Reisner
194111f8b2b6SAndreas Gruenbacher if (digest_size) {
19429104d31aSLars Ellenberg drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
194311f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) {
1944d0180171SAndreas Gruenbacher drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1945470be44aSLars Ellenberg (unsigned long long)sector, data_size);
1946b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
1947b411b363SPhilipp Reisner return NULL;
1948b411b363SPhilipp Reisner }
1949b411b363SPhilipp Reisner }
1950b30ab791SAndreas Gruenbacher device->recv_cnt += data_size >> 9;
1951db830c46SAndreas Gruenbacher return peer_req;
1952b411b363SPhilipp Reisner }
1953b411b363SPhilipp Reisner
1954b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block
1955b411b363SPhilipp Reisner * out of the socket input buffer, and discards it.
1956b411b363SPhilipp Reisner */
drbd_drain_block(struct drbd_peer_device * peer_device,int data_size)195769a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1958b411b363SPhilipp Reisner {
1959b411b363SPhilipp Reisner struct page *page;
1960a5c31904SAndreas Gruenbacher int err = 0;
1961b411b363SPhilipp Reisner void *data;
1962b411b363SPhilipp Reisner
1963c3470cdeSLars Ellenberg if (!data_size)
1964fc5be839SAndreas Gruenbacher return 0;
1965c3470cdeSLars Ellenberg
196669a22773SAndreas Gruenbacher page = drbd_alloc_pages(peer_device, 1, 1);
1967b411b363SPhilipp Reisner
1968b411b363SPhilipp Reisner data = kmap(page);
1969b411b363SPhilipp Reisner while (data_size) {
1970fc5be839SAndreas Gruenbacher unsigned int len = min_t(int, data_size, PAGE_SIZE);
1971fc5be839SAndreas Gruenbacher
197269a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len);
1973a5c31904SAndreas Gruenbacher if (err)
1974b411b363SPhilipp Reisner break;
1975a5c31904SAndreas Gruenbacher data_size -= len;
1976b411b363SPhilipp Reisner }
1977b411b363SPhilipp Reisner kunmap(page);
197869a22773SAndreas Gruenbacher drbd_free_pages(peer_device->device, page, 0);
1979fc5be839SAndreas Gruenbacher return err;
1980b411b363SPhilipp Reisner }
1981b411b363SPhilipp Reisner
recv_dless_read(struct drbd_peer_device * peer_device,struct drbd_request * req,sector_t sector,int data_size)198269a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1983b411b363SPhilipp Reisner sector_t sector, int data_size)
1984b411b363SPhilipp Reisner {
19857988613bSKent Overstreet struct bio_vec bvec;
19867988613bSKent Overstreet struct bvec_iter iter;
1987b411b363SPhilipp Reisner struct bio *bio;
198811f8b2b6SAndreas Gruenbacher int digest_size, err, expect;
198969a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in;
199069a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv;
1991b411b363SPhilipp Reisner
199211f8b2b6SAndreas Gruenbacher digest_size = 0;
199369a22773SAndreas Gruenbacher if (peer_device->connection->peer_integrity_tfm) {
19943d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
199511f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1996a5c31904SAndreas Gruenbacher if (err)
1997a5c31904SAndreas Gruenbacher return err;
199811f8b2b6SAndreas Gruenbacher data_size -= digest_size;
199988104ca4SAndreas Gruenbacher }
2000b411b363SPhilipp Reisner
2001b411b363SPhilipp Reisner /* optimistically update recv_cnt. if receiving fails below,
2002b411b363SPhilipp Reisner * we disconnect anyways, and counters will be reset. */
200369a22773SAndreas Gruenbacher peer_device->device->recv_cnt += data_size>>9;
2004b411b363SPhilipp Reisner
2005b411b363SPhilipp Reisner bio = req->master_bio;
200669a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
2007b411b363SPhilipp Reisner
20087988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) {
20093eddaa60SChristoph Hellwig void *mapped = bvec_kmap_local(&bvec);
20107988613bSKent Overstreet expect = min_t(int, data_size, bvec.bv_len);
201169a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
20123eddaa60SChristoph Hellwig kunmap_local(mapped);
2013a5c31904SAndreas Gruenbacher if (err)
2014a5c31904SAndreas Gruenbacher return err;
2015a5c31904SAndreas Gruenbacher data_size -= expect;
2016b411b363SPhilipp Reisner }
2017b411b363SPhilipp Reisner
201811f8b2b6SAndreas Gruenbacher if (digest_size) {
201969a22773SAndreas Gruenbacher drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
202011f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) {
202169a22773SAndreas Gruenbacher drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
202228284cefSAndreas Gruenbacher return -EINVAL;
2023b411b363SPhilipp Reisner }
2024b411b363SPhilipp Reisner }
2025b411b363SPhilipp Reisner
202669a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, data_size == 0);
202728284cefSAndreas Gruenbacher return 0;
2028b411b363SPhilipp Reisner }
2029b411b363SPhilipp Reisner
2030a990be46SAndreas Gruenbacher /*
2031668700b4SPhilipp Reisner * e_end_resync_block() is called in ack_sender context via
2032a990be46SAndreas Gruenbacher * drbd_finish_peer_reqs().
2033a990be46SAndreas Gruenbacher */
e_end_resync_block(struct drbd_work * w,int unused)203499920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused)
2035b411b363SPhilipp Reisner {
20368050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req =
2037a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w);
2038a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device;
2039a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
2040db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector;
204199920dc5SAndreas Gruenbacher int err;
2042b411b363SPhilipp Reisner
20430b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2044b411b363SPhilipp Reisner
2045db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
20460d11f3cfSChristoph Böhmwalder drbd_set_in_sync(peer_device, sector, peer_req->i.size);
2047a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2048b411b363SPhilipp Reisner } else {
2049b411b363SPhilipp Reisner /* Record failure to sync */
20500d11f3cfSChristoph Böhmwalder drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
2051b411b363SPhilipp Reisner
2052a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2053b411b363SPhilipp Reisner }
2054b30ab791SAndreas Gruenbacher dec_unacked(device);
2055b411b363SPhilipp Reisner
205699920dc5SAndreas Gruenbacher return err;
2057b411b363SPhilipp Reisner }
2058b411b363SPhilipp Reisner
recv_resync_read(struct drbd_peer_device * peer_device,sector_t sector,struct packet_info * pi)205969a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2060a0fb3c47SLars Ellenberg struct packet_info *pi) __releases(local)
2061b411b363SPhilipp Reisner {
206269a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
2063db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req;
2064b411b363SPhilipp Reisner
2065a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2066db830c46SAndreas Gruenbacher if (!peer_req)
206745bb912bSLars Ellenberg goto fail;
2068b411b363SPhilipp Reisner
20690d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
2070b411b363SPhilipp Reisner
2071b30ab791SAndreas Gruenbacher inc_unacked(device);
2072b411b363SPhilipp Reisner /* corresponding dec_unacked() in e_end_resync_block()
2073b411b363SPhilipp Reisner * respective _drbd_clear_done_ee */
2074b411b363SPhilipp Reisner
2075a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_resync_block;
2076ce668b6dSChristoph Böhmwalder peer_req->opf = REQ_OP_WRITE;
207721ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies;
207845bb912bSLars Ellenberg
20790500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2080b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->sync_ee);
20810500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
2082b411b363SPhilipp Reisner
2083a0fb3c47SLars Ellenberg atomic_add(pi->size >> 9, &device->rs_sect_ev);
2084ce668b6dSChristoph Böhmwalder if (drbd_submit_peer_request(peer_req) == 0)
2085e1c1b0fcSAndreas Gruenbacher return 0;
208645bb912bSLars Ellenberg
208710f6d992SLars Ellenberg /* don't care for the reason here */
2088d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n");
20890500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2090a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list);
20910500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
209222cc37a9SLars Ellenberg
2093b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
209445bb912bSLars Ellenberg fail:
2095b30ab791SAndreas Gruenbacher put_ldev(device);
2096e1c1b0fcSAndreas Gruenbacher return -EIO;
2097b411b363SPhilipp Reisner }
2098b411b363SPhilipp Reisner
2099668eebc6SAndreas Gruenbacher static struct drbd_request *
find_request(struct drbd_device * device,struct rb_root * root,u64 id,sector_t sector,bool missing_ok,const char * func)2100b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2101bc9c5c41SAndreas Gruenbacher sector_t sector, bool missing_ok, const char *func)
2102b411b363SPhilipp Reisner {
2103b411b363SPhilipp Reisner struct drbd_request *req;
2104668eebc6SAndreas Gruenbacher
2105bc9c5c41SAndreas Gruenbacher /* Request object according to our peer */
2106bc9c5c41SAndreas Gruenbacher req = (struct drbd_request *)(unsigned long)id;
21075e472264SAndreas Gruenbacher if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2108668eebc6SAndreas Gruenbacher return req;
2109c3afd8f5SAndreas Gruenbacher if (!missing_ok) {
2110d0180171SAndreas Gruenbacher drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2111c3afd8f5SAndreas Gruenbacher (unsigned long)id, (unsigned long long)sector);
2112c3afd8f5SAndreas Gruenbacher }
2113668eebc6SAndreas Gruenbacher return NULL;
2114668eebc6SAndreas Gruenbacher }
2115668eebc6SAndreas Gruenbacher
receive_DataReply(struct drbd_connection * connection,struct packet_info * pi)2116bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2117b411b363SPhilipp Reisner {
21189f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
2119b30ab791SAndreas Gruenbacher struct drbd_device *device;
2120b411b363SPhilipp Reisner struct drbd_request *req;
2121b411b363SPhilipp Reisner sector_t sector;
212282bc0194SAndreas Gruenbacher int err;
2123e658983aSAndreas Gruenbacher struct p_data *p = pi->data;
21244a76b161SAndreas Gruenbacher
21259f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
21269f4fe9adSAndreas Gruenbacher if (!peer_device)
21274a76b161SAndreas Gruenbacher return -EIO;
21289f4fe9adSAndreas Gruenbacher device = peer_device->device;
2129b411b363SPhilipp Reisner
2130b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
2131b411b363SPhilipp Reisner
21320500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2133b30ab791SAndreas Gruenbacher req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
21340500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
2135c3afd8f5SAndreas Gruenbacher if (unlikely(!req))
213682bc0194SAndreas Gruenbacher return -EIO;
2137b411b363SPhilipp Reisner
213869a22773SAndreas Gruenbacher err = recv_dless_read(peer_device, req, sector, pi->size);
213982bc0194SAndreas Gruenbacher if (!err)
2140ad878a0dSChristoph Böhmwalder req_mod(req, DATA_RECEIVED, peer_device);
2141b411b363SPhilipp Reisner /* else: nothing. handled from drbd_disconnect...
2142b411b363SPhilipp Reisner * I don't think we may complete this just yet
2143b411b363SPhilipp Reisner * in case we are "on-disconnect: freeze" */
2144b411b363SPhilipp Reisner
214582bc0194SAndreas Gruenbacher return err;
2146b411b363SPhilipp Reisner }
2147b411b363SPhilipp Reisner
receive_RSDataReply(struct drbd_connection * connection,struct packet_info * pi)2148bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2149b411b363SPhilipp Reisner {
21509f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
2151b30ab791SAndreas Gruenbacher struct drbd_device *device;
2152b411b363SPhilipp Reisner sector_t sector;
215382bc0194SAndreas Gruenbacher int err;
2154e658983aSAndreas Gruenbacher struct p_data *p = pi->data;
21554a76b161SAndreas Gruenbacher
21569f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
21579f4fe9adSAndreas Gruenbacher if (!peer_device)
21584a76b161SAndreas Gruenbacher return -EIO;
21599f4fe9adSAndreas Gruenbacher device = peer_device->device;
2160b411b363SPhilipp Reisner
2161b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
21620b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->block_id == ID_SYNCER);
2163b411b363SPhilipp Reisner
2164b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
2165b411b363SPhilipp Reisner /* data is submitted to disk within recv_resync_read.
2166b411b363SPhilipp Reisner * corresponding put_ldev done below on error,
2167fcefa62eSAndreas Gruenbacher * or in drbd_peer_request_endio. */
2168a0fb3c47SLars Ellenberg err = recv_resync_read(peer_device, sector, pi);
2169b411b363SPhilipp Reisner } else {
2170e3fa02d7SChristoph Böhmwalder if (drbd_ratelimit())
2171d0180171SAndreas Gruenbacher drbd_err(device, "Can not write resync data to local disk.\n");
2172b411b363SPhilipp Reisner
217369a22773SAndreas Gruenbacher err = drbd_drain_block(peer_device, pi->size);
2174b411b363SPhilipp Reisner
217569a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2176b411b363SPhilipp Reisner }
2177b411b363SPhilipp Reisner
2178b30ab791SAndreas Gruenbacher atomic_add(pi->size >> 9, &device->rs_sect_in);
2179778f271dSPhilipp Reisner
218082bc0194SAndreas Gruenbacher return err;
2181b411b363SPhilipp Reisner }
2182b411b363SPhilipp Reisner
restart_conflicting_writes(struct drbd_device * device,sector_t sector,int size)2183b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device,
21847be8da07SAndreas Gruenbacher sector_t sector, int size)
2185b411b363SPhilipp Reisner {
21867be8da07SAndreas Gruenbacher struct drbd_interval *i;
21877be8da07SAndreas Gruenbacher struct drbd_request *req;
2188b411b363SPhilipp Reisner
2189b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) {
21907be8da07SAndreas Gruenbacher if (!i->local)
21917be8da07SAndreas Gruenbacher continue;
21927be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i);
21937be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING ||
21947be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED))
21957be8da07SAndreas Gruenbacher continue;
21962312f0b3SLars Ellenberg /* as it is RQ_POSTPONED, this will cause it to
21972312f0b3SLars Ellenberg * be queued on the retry workqueue. */
2198ad878a0dSChristoph Böhmwalder __req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
21997be8da07SAndreas Gruenbacher }
22007be8da07SAndreas Gruenbacher }
22017be8da07SAndreas Gruenbacher
2202a990be46SAndreas Gruenbacher /*
2203668700b4SPhilipp Reisner * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
2204b411b363SPhilipp Reisner */
e_end_block(struct drbd_work * w,int cancel)220599920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel)
2206b411b363SPhilipp Reisner {
22078050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req =
2208a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w);
2209a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device;
2210a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
2211db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector;
221299920dc5SAndreas Gruenbacher int err = 0, pcmd;
2213b411b363SPhilipp Reisner
2214303d1448SPhilipp Reisner if (peer_req->flags & EE_SEND_WRITE_ACK) {
2215db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2216b30ab791SAndreas Gruenbacher pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2217b30ab791SAndreas Gruenbacher device->state.conn <= C_PAUSED_SYNC_T &&
2218db830c46SAndreas Gruenbacher peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2219b411b363SPhilipp Reisner P_RS_WRITE_ACK : P_WRITE_ACK;
2220a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, pcmd, peer_req);
2221b411b363SPhilipp Reisner if (pcmd == P_RS_WRITE_ACK)
22220d11f3cfSChristoph Böhmwalder drbd_set_in_sync(peer_device, sector, peer_req->i.size);
2223b411b363SPhilipp Reisner } else {
2224a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2225b411b363SPhilipp Reisner /* we expect it to be marked out of sync anyways...
2226b411b363SPhilipp Reisner * maybe assert this? */
2227b411b363SPhilipp Reisner }
2228b30ab791SAndreas Gruenbacher dec_unacked(device);
2229b411b363SPhilipp Reisner }
223008d0dabfSLars Ellenberg
2231b411b363SPhilipp Reisner /* we delete from the conflict detection hash _after_ we sent out the
2232b411b363SPhilipp Reisner * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
2233302bdeaeSPhilipp Reisner if (peer_req->flags & EE_IN_INTERVAL_TREE) {
22340500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
22350b0ba1efSAndreas Gruenbacher D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2236b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req);
22377be8da07SAndreas Gruenbacher if (peer_req->flags & EE_RESTART_REQUESTS)
2238b30ab791SAndreas Gruenbacher restart_conflicting_writes(device, sector, peer_req->i.size);
22390500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
2240bb3bfe96SAndreas Gruenbacher } else
22410b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2242b411b363SPhilipp Reisner
22435dd2ca19SAndreas Gruenbacher drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2244b411b363SPhilipp Reisner
224599920dc5SAndreas Gruenbacher return err;
2246b411b363SPhilipp Reisner }
2247b411b363SPhilipp Reisner
e_send_ack(struct drbd_work * w,enum drbd_packet ack)2248a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2249b411b363SPhilipp Reisner {
22508050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req =
2251a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w);
2252a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device;
225399920dc5SAndreas Gruenbacher int err;
2254b411b363SPhilipp Reisner
2255a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, ack, peer_req);
2256a8cd15baSAndreas Gruenbacher dec_unacked(peer_device->device);
2257b411b363SPhilipp Reisner
225899920dc5SAndreas Gruenbacher return err;
2259b411b363SPhilipp Reisner }
2260b411b363SPhilipp Reisner
e_send_superseded(struct drbd_work * w,int unused)2261d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused)
2262b6a370baSPhilipp Reisner {
2263a8cd15baSAndreas Gruenbacher return e_send_ack(w, P_SUPERSEDED);
22647be8da07SAndreas Gruenbacher }
2265b6a370baSPhilipp Reisner
e_send_retry_write(struct drbd_work * w,int unused)226699920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused)
22677be8da07SAndreas Gruenbacher {
2268a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req =
2269a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w);
2270a8cd15baSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection;
22717be8da07SAndreas Gruenbacher
2272a8cd15baSAndreas Gruenbacher return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2273d4dabbe2SLars Ellenberg P_RETRY_WRITE : P_SUPERSEDED);
22747be8da07SAndreas Gruenbacher }
22757be8da07SAndreas Gruenbacher
seq_greater(u32 a,u32 b)22763e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b)
22773e394da1SAndreas Gruenbacher {
22783e394da1SAndreas Gruenbacher /*
22793e394da1SAndreas Gruenbacher * We assume 32-bit wrap-around here.
22803e394da1SAndreas Gruenbacher * For 24-bit wrap-around, we would have to shift:
22813e394da1SAndreas Gruenbacher * a <<= 8; b <<= 8;
22823e394da1SAndreas Gruenbacher */
22833e394da1SAndreas Gruenbacher return (s32)a - (s32)b > 0;
22843e394da1SAndreas Gruenbacher }
22853e394da1SAndreas Gruenbacher
seq_max(u32 a,u32 b)22863e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b)
22873e394da1SAndreas Gruenbacher {
22883e394da1SAndreas Gruenbacher return seq_greater(a, b) ? a : b;
22893e394da1SAndreas Gruenbacher }
22903e394da1SAndreas Gruenbacher
update_peer_seq(struct drbd_peer_device * peer_device,unsigned int peer_seq)229169a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
22923e394da1SAndreas Gruenbacher {
229369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
22943c13b680SLars Ellenberg unsigned int newest_peer_seq;
22953e394da1SAndreas Gruenbacher
229669a22773SAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2297b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock);
2298b30ab791SAndreas Gruenbacher newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2299b30ab791SAndreas Gruenbacher device->peer_seq = newest_peer_seq;
2300b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock);
2301b30ab791SAndreas Gruenbacher /* wake up only if we actually changed device->peer_seq */
23023c13b680SLars Ellenberg if (peer_seq == newest_peer_seq)
2303b30ab791SAndreas Gruenbacher wake_up(&device->seq_wait);
23043e394da1SAndreas Gruenbacher }
23057be8da07SAndreas Gruenbacher }
23063e394da1SAndreas Gruenbacher
overlaps(sector_t s1,int l1,sector_t s2,int l2)2307d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2308d93f6302SLars Ellenberg {
2309d93f6302SLars Ellenberg return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2310d93f6302SLars Ellenberg }
2311d93f6302SLars Ellenberg
2312d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */
overlapping_resync_write(struct drbd_device * device,struct drbd_peer_request * peer_req)2313b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2314d93f6302SLars Ellenberg {
2315d93f6302SLars Ellenberg struct drbd_peer_request *rs_req;
23167e5fec31SFabian Frederick bool rv = false;
2317b6a370baSPhilipp Reisner
23180500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2319a8cd15baSAndreas Gruenbacher list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2320d93f6302SLars Ellenberg if (overlaps(peer_req->i.sector, peer_req->i.size,
2321d93f6302SLars Ellenberg rs_req->i.sector, rs_req->i.size)) {
23227e5fec31SFabian Frederick rv = true;
2323b6a370baSPhilipp Reisner break;
2324b6a370baSPhilipp Reisner }
2325b6a370baSPhilipp Reisner }
23260500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
2327b6a370baSPhilipp Reisner
2328b6a370baSPhilipp Reisner return rv;
2329b6a370baSPhilipp Reisner }
2330b6a370baSPhilipp Reisner
2331b411b363SPhilipp Reisner /* Called from receive_Data.
2332b411b363SPhilipp Reisner * Synchronize packets on sock with packets on msock.
2333b411b363SPhilipp Reisner *
2334b411b363SPhilipp Reisner * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2335b411b363SPhilipp Reisner * packet traveling on msock, they are still processed in the order they have
2336b411b363SPhilipp Reisner * been sent.
2337b411b363SPhilipp Reisner *
2338b411b363SPhilipp Reisner * Note: we don't care for Ack packets overtaking P_DATA packets.
2339b411b363SPhilipp Reisner *
2340b30ab791SAndreas Gruenbacher * In case packet_seq is larger than device->peer_seq number, there are
2341b411b363SPhilipp Reisner * outstanding packets on the msock. We wait for them to arrive.
2342b30ab791SAndreas Gruenbacher * In case we are the logically next packet, we update device->peer_seq
2343b411b363SPhilipp Reisner * ourselves. Correctly handles 32bit wrap around.
2344b411b363SPhilipp Reisner *
2345b411b363SPhilipp Reisner * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2346b411b363SPhilipp Reisner * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2347b411b363SPhilipp Reisner * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2348b411b363SPhilipp Reisner * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2349b411b363SPhilipp Reisner *
2350b411b363SPhilipp Reisner * returns 0 if we may process the packet,
2351b411b363SPhilipp Reisner * -ERESTARTSYS if we were interrupted (by disconnect signal). */
wait_for_and_update_peer_seq(struct drbd_peer_device * peer_device,const u32 peer_seq)235269a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2353b411b363SPhilipp Reisner {
235469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
2355b411b363SPhilipp Reisner DEFINE_WAIT(wait);
2356b411b363SPhilipp Reisner long timeout;
2357b874d231SPhilipp Reisner int ret = 0, tp;
23587be8da07SAndreas Gruenbacher
235969a22773SAndreas Gruenbacher if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
23607be8da07SAndreas Gruenbacher return 0;
23617be8da07SAndreas Gruenbacher
2362b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock);
2363b411b363SPhilipp Reisner for (;;) {
2364b30ab791SAndreas Gruenbacher if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2365b30ab791SAndreas Gruenbacher device->peer_seq = seq_max(device->peer_seq, peer_seq);
2366b411b363SPhilipp Reisner break;
23677be8da07SAndreas Gruenbacher }
2368b874d231SPhilipp Reisner
2369b411b363SPhilipp Reisner if (signal_pending(current)) {
2370b411b363SPhilipp Reisner ret = -ERESTARTSYS;
2371b411b363SPhilipp Reisner break;
2372b411b363SPhilipp Reisner }
2373b874d231SPhilipp Reisner
2374b874d231SPhilipp Reisner rcu_read_lock();
23755dd2ca19SAndreas Gruenbacher tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2376b874d231SPhilipp Reisner rcu_read_unlock();
2377b874d231SPhilipp Reisner
2378b874d231SPhilipp Reisner if (!tp)
2379b874d231SPhilipp Reisner break;
2380b874d231SPhilipp Reisner
2381b874d231SPhilipp Reisner /* Only need to wait if two_primaries is enabled */
2382b30ab791SAndreas Gruenbacher prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2383b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock);
238444ed167dSPhilipp Reisner rcu_read_lock();
238569a22773SAndreas Gruenbacher timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
238644ed167dSPhilipp Reisner rcu_read_unlock();
238771b1c1ebSAndreas Gruenbacher timeout = schedule_timeout(timeout);
2388b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock);
23897be8da07SAndreas Gruenbacher if (!timeout) {
2390b411b363SPhilipp Reisner ret = -ETIMEDOUT;
2391d0180171SAndreas Gruenbacher drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2392b411b363SPhilipp Reisner break;
2393b411b363SPhilipp Reisner }
2394b411b363SPhilipp Reisner }
2395b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock);
2396b30ab791SAndreas Gruenbacher finish_wait(&device->seq_wait, &wait);
2397b411b363SPhilipp Reisner return ret;
2398b411b363SPhilipp Reisner }
2399b411b363SPhilipp Reisner
wire_flags_to_bio_op(u32 dpf)24009945172aSBart Van Assche static enum req_op wire_flags_to_bio_op(u32 dpf)
2401bb3cc85eSMike Christie {
2402f31e583aSLars Ellenberg if (dpf & DP_ZEROES)
240345c21793SChristoph Hellwig return REQ_OP_WRITE_ZEROES;
2404f31e583aSLars Ellenberg if (dpf & DP_DISCARD)
2405f31e583aSLars Ellenberg return REQ_OP_DISCARD;
2406bb3cc85eSMike Christie else
2407bb3cc85eSMike Christie return REQ_OP_WRITE;
240876d2e7ecSPhilipp Reisner }
240976d2e7ecSPhilipp Reisner
2410ce668b6dSChristoph Böhmwalder /* see also bio_flags_to_wire() */
wire_flags_to_bio(struct drbd_connection * connection,u32 dpf)2411ce668b6dSChristoph Böhmwalder static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
2412ce668b6dSChristoph Böhmwalder {
2413ce668b6dSChristoph Böhmwalder return wire_flags_to_bio_op(dpf) |
2414ce668b6dSChristoph Böhmwalder (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2415ce668b6dSChristoph Böhmwalder (dpf & DP_FUA ? REQ_FUA : 0) |
2416ce668b6dSChristoph Böhmwalder (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2417ce668b6dSChristoph Böhmwalder }
2418ce668b6dSChristoph Böhmwalder
fail_postponed_requests(struct drbd_device * device,sector_t sector,unsigned int size)2419b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
24207be8da07SAndreas Gruenbacher unsigned int size)
2421b411b363SPhilipp Reisner {
2422ad878a0dSChristoph Böhmwalder struct drbd_peer_device *peer_device = first_peer_device(device);
24237be8da07SAndreas Gruenbacher struct drbd_interval *i;
24247be8da07SAndreas Gruenbacher
24257be8da07SAndreas Gruenbacher repeat:
2426b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24277be8da07SAndreas Gruenbacher struct drbd_request *req;
24287be8da07SAndreas Gruenbacher struct bio_and_error m;
24297be8da07SAndreas Gruenbacher
24307be8da07SAndreas Gruenbacher if (!i->local)
24317be8da07SAndreas Gruenbacher continue;
24327be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i);
24337be8da07SAndreas Gruenbacher if (!(req->rq_state & RQ_POSTPONED))
24347be8da07SAndreas Gruenbacher continue;
24357be8da07SAndreas Gruenbacher req->rq_state &= ~RQ_POSTPONED;
2436ad878a0dSChristoph Böhmwalder __req_mod(req, NEG_ACKED, peer_device, &m);
24370500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
24387be8da07SAndreas Gruenbacher if (m.bio)
2439b30ab791SAndreas Gruenbacher complete_master_bio(device, &m);
24400500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
24417be8da07SAndreas Gruenbacher goto repeat;
24427be8da07SAndreas Gruenbacher }
24437be8da07SAndreas Gruenbacher }
24447be8da07SAndreas Gruenbacher
handle_write_conflicts(struct drbd_device * device,struct drbd_peer_request * peer_req)2445b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device,
24467be8da07SAndreas Gruenbacher struct drbd_peer_request *peer_req)
24477be8da07SAndreas Gruenbacher {
2448e33b32deSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection;
2449bde89a9eSAndreas Gruenbacher bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
24507be8da07SAndreas Gruenbacher sector_t sector = peer_req->i.sector;
24517be8da07SAndreas Gruenbacher const unsigned int size = peer_req->i.size;
24527be8da07SAndreas Gruenbacher struct drbd_interval *i;
24537be8da07SAndreas Gruenbacher bool equal;
24547be8da07SAndreas Gruenbacher int err;
24557be8da07SAndreas Gruenbacher
24567be8da07SAndreas Gruenbacher /*
24577be8da07SAndreas Gruenbacher * Inserting the peer request into the write_requests tree will prevent
24587be8da07SAndreas Gruenbacher * new conflicting local requests from being added.
24597be8da07SAndreas Gruenbacher */
2460b30ab791SAndreas Gruenbacher drbd_insert_interval(&device->write_requests, &peer_req->i);
24617be8da07SAndreas Gruenbacher
24627be8da07SAndreas Gruenbacher repeat:
2463b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) {
24647be8da07SAndreas Gruenbacher if (i == &peer_req->i)
24657be8da07SAndreas Gruenbacher continue;
246608d0dabfSLars Ellenberg if (i->completed)
246708d0dabfSLars Ellenberg continue;
24687be8da07SAndreas Gruenbacher
24697be8da07SAndreas Gruenbacher if (!i->local) {
24707be8da07SAndreas Gruenbacher /*
24717be8da07SAndreas Gruenbacher * Our peer has sent a conflicting remote request; this
24727be8da07SAndreas Gruenbacher * should not happen in a two-node setup. Wait for the
24737be8da07SAndreas Gruenbacher * earlier peer request to complete.
24747be8da07SAndreas Gruenbacher */
2475b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, i);
24767be8da07SAndreas Gruenbacher if (err)
24777be8da07SAndreas Gruenbacher goto out;
24787be8da07SAndreas Gruenbacher goto repeat;
24797be8da07SAndreas Gruenbacher }
24807be8da07SAndreas Gruenbacher
24817be8da07SAndreas Gruenbacher equal = i->sector == sector && i->size == size;
24827be8da07SAndreas Gruenbacher if (resolve_conflicts) {
24837be8da07SAndreas Gruenbacher /*
24847be8da07SAndreas Gruenbacher * If the peer request is fully contained within the
2485d4dabbe2SLars Ellenberg * overlapping request, it can be considered overwritten
2486d4dabbe2SLars Ellenberg * and thus superseded; otherwise, it will be retried
2487d4dabbe2SLars Ellenberg * once all overlapping requests have completed.
24887be8da07SAndreas Gruenbacher */
2489d4dabbe2SLars Ellenberg bool superseded = i->sector <= sector && i->sector +
24907be8da07SAndreas Gruenbacher (i->size >> 9) >= sector + (size >> 9);
24917be8da07SAndreas Gruenbacher
24927be8da07SAndreas Gruenbacher if (!equal)
2493d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: "
24947be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u, "
24957be8da07SAndreas Gruenbacher "assuming %s came first\n",
24967be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size,
24977be8da07SAndreas Gruenbacher (unsigned long long)sector, size,
2498d4dabbe2SLars Ellenberg superseded ? "local" : "remote");
24997be8da07SAndreas Gruenbacher
2500a8cd15baSAndreas Gruenbacher peer_req->w.cb = superseded ? e_send_superseded :
25017be8da07SAndreas Gruenbacher e_send_retry_write;
2502a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->done_ee);
2503668700b4SPhilipp Reisner queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
25047be8da07SAndreas Gruenbacher
25057be8da07SAndreas Gruenbacher err = -ENOENT;
25067be8da07SAndreas Gruenbacher goto out;
25077be8da07SAndreas Gruenbacher } else {
25087be8da07SAndreas Gruenbacher struct drbd_request *req =
25097be8da07SAndreas Gruenbacher container_of(i, struct drbd_request, i);
25107be8da07SAndreas Gruenbacher
25117be8da07SAndreas Gruenbacher if (!equal)
2512d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: "
25137be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u\n",
25147be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size,
25157be8da07SAndreas Gruenbacher (unsigned long long)sector, size);
25167be8da07SAndreas Gruenbacher
25177be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING ||
25187be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) {
25197be8da07SAndreas Gruenbacher /*
25207be8da07SAndreas Gruenbacher * Wait for the node with the discard flag to
2521d4dabbe2SLars Ellenberg * decide if this request has been superseded
2522d4dabbe2SLars Ellenberg * or needs to be retried.
2523d4dabbe2SLars Ellenberg * Requests that have been superseded will
25247be8da07SAndreas Gruenbacher * disappear from the write_requests tree.
25257be8da07SAndreas Gruenbacher *
25267be8da07SAndreas Gruenbacher * In addition, wait for the conflicting
25277be8da07SAndreas Gruenbacher * request to finish locally before submitting
25287be8da07SAndreas Gruenbacher * the conflicting peer request.
25297be8da07SAndreas Gruenbacher */
2530b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, &req->i);
25317be8da07SAndreas Gruenbacher if (err) {
2532e33b32deSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2533b30ab791SAndreas Gruenbacher fail_postponed_requests(device, sector, size);
25347be8da07SAndreas Gruenbacher goto out;
25357be8da07SAndreas Gruenbacher }
25367be8da07SAndreas Gruenbacher goto repeat;
25377be8da07SAndreas Gruenbacher }
25387be8da07SAndreas Gruenbacher /*
25397be8da07SAndreas Gruenbacher * Remember to restart the conflicting requests after
25407be8da07SAndreas Gruenbacher * the new peer request has completed.
25417be8da07SAndreas Gruenbacher */
25427be8da07SAndreas Gruenbacher peer_req->flags |= EE_RESTART_REQUESTS;
25437be8da07SAndreas Gruenbacher }
25447be8da07SAndreas Gruenbacher }
25457be8da07SAndreas Gruenbacher err = 0;
25467be8da07SAndreas Gruenbacher
25477be8da07SAndreas Gruenbacher out:
25487be8da07SAndreas Gruenbacher if (err)
2549b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req);
25507be8da07SAndreas Gruenbacher return err;
25517be8da07SAndreas Gruenbacher }
25527be8da07SAndreas Gruenbacher
2553b411b363SPhilipp Reisner /* mirrored write */
receive_Data(struct drbd_connection * connection,struct packet_info * pi)2554bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2555b411b363SPhilipp Reisner {
25569f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
2557b30ab791SAndreas Gruenbacher struct drbd_device *device;
255821ae5d7fSLars Ellenberg struct net_conf *nc;
2559b411b363SPhilipp Reisner sector_t sector;
2560db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req;
2561e658983aSAndreas Gruenbacher struct p_data *p = pi->data;
25627be8da07SAndreas Gruenbacher u32 peer_seq = be32_to_cpu(p->seq_num);
2563b411b363SPhilipp Reisner u32 dp_flags;
2564302bdeaeSPhilipp Reisner int err, tp;
25657be8da07SAndreas Gruenbacher
25669f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
25679f4fe9adSAndreas Gruenbacher if (!peer_device)
25684a76b161SAndreas Gruenbacher return -EIO;
25699f4fe9adSAndreas Gruenbacher device = peer_device->device;
2570b411b363SPhilipp Reisner
2571b30ab791SAndreas Gruenbacher if (!get_ldev(device)) {
257282bc0194SAndreas Gruenbacher int err2;
2573b411b363SPhilipp Reisner
257469a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq);
257569a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2576bde89a9eSAndreas Gruenbacher atomic_inc(&connection->current_epoch->epoch_size);
257769a22773SAndreas Gruenbacher err2 = drbd_drain_block(peer_device, pi->size);
257882bc0194SAndreas Gruenbacher if (!err)
257982bc0194SAndreas Gruenbacher err = err2;
258082bc0194SAndreas Gruenbacher return err;
2581b411b363SPhilipp Reisner }
2582b411b363SPhilipp Reisner
2583fcefa62eSAndreas Gruenbacher /*
2584fcefa62eSAndreas Gruenbacher * Corresponding put_ldev done either below (on various errors), or in
2585fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio, if we successfully submit the data at the
2586fcefa62eSAndreas Gruenbacher * end of this function.
2587fcefa62eSAndreas Gruenbacher */
2588b411b363SPhilipp Reisner
2589b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
2590a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2591db830c46SAndreas Gruenbacher if (!peer_req) {
2592b30ab791SAndreas Gruenbacher put_ldev(device);
259382bc0194SAndreas Gruenbacher return -EIO;
2594b411b363SPhilipp Reisner }
2595b411b363SPhilipp Reisner
2596a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_block;
259721ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies;
259821ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION;
2599b411b363SPhilipp Reisner
2600688593c5SLars Ellenberg dp_flags = be32_to_cpu(p->dp_flags);
2601ce668b6dSChristoph Böhmwalder peer_req->opf = wire_flags_to_bio(connection, dp_flags);
2602a0fb3c47SLars Ellenberg if (pi->cmd == P_TRIM) {
2603a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0);
2604ce668b6dSChristoph Böhmwalder D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
2605f31e583aSLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL);
2606f31e583aSLars Ellenberg /* need to play safe: an older DRBD sender
2607f31e583aSLars Ellenberg * may mean zero-out while sending P_TRIM. */
2608f31e583aSLars Ellenberg if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
2609f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT;
2610f31e583aSLars Ellenberg } else if (pi->cmd == P_ZEROES) {
2611f31e583aSLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0);
2612ce668b6dSChristoph Böhmwalder D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
2613a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL);
2614f31e583aSLars Ellenberg /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
2615f31e583aSLars Ellenberg if (dp_flags & DP_DISCARD)
2616f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM;
2617a0fb3c47SLars Ellenberg } else if (peer_req->pages == NULL) {
26180b0ba1efSAndreas Gruenbacher D_ASSERT(device, peer_req->i.size == 0);
26190b0ba1efSAndreas Gruenbacher D_ASSERT(device, dp_flags & DP_FLUSH);
2620a73ff323SLars Ellenberg }
2621688593c5SLars Ellenberg
2622688593c5SLars Ellenberg if (dp_flags & DP_MAY_SET_IN_SYNC)
2623db830c46SAndreas Gruenbacher peer_req->flags |= EE_MAY_SET_IN_SYNC;
2624688593c5SLars Ellenberg
2625bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock);
2626bde89a9eSAndreas Gruenbacher peer_req->epoch = connection->current_epoch;
2627db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->epoch_size);
2628db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->active);
2629bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock);
2630b411b363SPhilipp Reisner
2631302bdeaeSPhilipp Reisner rcu_read_lock();
263221ae5d7fSLars Ellenberg nc = rcu_dereference(peer_device->connection->net_conf);
263321ae5d7fSLars Ellenberg tp = nc->two_primaries;
263421ae5d7fSLars Ellenberg if (peer_device->connection->agreed_pro_version < 100) {
263521ae5d7fSLars Ellenberg switch (nc->wire_protocol) {
263621ae5d7fSLars Ellenberg case DRBD_PROT_C:
263721ae5d7fSLars Ellenberg dp_flags |= DP_SEND_WRITE_ACK;
263821ae5d7fSLars Ellenberg break;
263921ae5d7fSLars Ellenberg case DRBD_PROT_B:
264021ae5d7fSLars Ellenberg dp_flags |= DP_SEND_RECEIVE_ACK;
264121ae5d7fSLars Ellenberg break;
264221ae5d7fSLars Ellenberg }
264321ae5d7fSLars Ellenberg }
2644302bdeaeSPhilipp Reisner rcu_read_unlock();
264521ae5d7fSLars Ellenberg
264621ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_WRITE_ACK) {
264721ae5d7fSLars Ellenberg peer_req->flags |= EE_SEND_WRITE_ACK;
264821ae5d7fSLars Ellenberg inc_unacked(device);
264921ae5d7fSLars Ellenberg /* corresponding dec_unacked() in e_end_block()
265021ae5d7fSLars Ellenberg * respective _drbd_clear_done_ee */
265121ae5d7fSLars Ellenberg }
265221ae5d7fSLars Ellenberg
265321ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_RECEIVE_ACK) {
265421ae5d7fSLars Ellenberg /* I really don't like it that the receiver thread
265521ae5d7fSLars Ellenberg * sends on the msock, but anyways */
26565dd2ca19SAndreas Gruenbacher drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
265721ae5d7fSLars Ellenberg }
265821ae5d7fSLars Ellenberg
2659302bdeaeSPhilipp Reisner if (tp) {
266021ae5d7fSLars Ellenberg /* two primaries implies protocol C */
266121ae5d7fSLars Ellenberg D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2662302bdeaeSPhilipp Reisner peer_req->flags |= EE_IN_INTERVAL_TREE;
266369a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq);
26647be8da07SAndreas Gruenbacher if (err)
2665b411b363SPhilipp Reisner goto out_interrupted;
26660500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2667b30ab791SAndreas Gruenbacher err = handle_write_conflicts(device, peer_req);
26687be8da07SAndreas Gruenbacher if (err) {
26690500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
26707be8da07SAndreas Gruenbacher if (err == -ENOENT) {
2671b30ab791SAndreas Gruenbacher put_ldev(device);
267282bc0194SAndreas Gruenbacher return 0;
2673b411b363SPhilipp Reisner }
2674b411b363SPhilipp Reisner goto out_interrupted;
2675b411b363SPhilipp Reisner }
2676b874d231SPhilipp Reisner } else {
267769a22773SAndreas Gruenbacher update_peer_seq(peer_device, peer_seq);
26780500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2679b874d231SPhilipp Reisner }
2680a34592ffSChristoph Hellwig /* TRIM and is processed synchronously,
26819104d31aSLars Ellenberg * we wait for all pending requests, respectively wait for
2682a0fb3c47SLars Ellenberg * active_ee to become empty in drbd_submit_peer_request();
2683a0fb3c47SLars Ellenberg * better not add ourselves here. */
2684a34592ffSChristoph Hellwig if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
2685b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee);
26860500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
2687b411b363SPhilipp Reisner
2688b30ab791SAndreas Gruenbacher if (device->state.conn == C_SYNC_TARGET)
2689b30ab791SAndreas Gruenbacher wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2690b6a370baSPhilipp Reisner
2691b30ab791SAndreas Gruenbacher if (device->state.pdsk < D_INCONSISTENT) {
2692b411b363SPhilipp Reisner /* In case we have the only disk of the cluster, */
26930d11f3cfSChristoph Böhmwalder drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
2694db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
26954dd726f0SLars Ellenberg drbd_al_begin_io(device, &peer_req->i);
269621ae5d7fSLars Ellenberg peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2697b411b363SPhilipp Reisner }
2698b411b363SPhilipp Reisner
2699ce668b6dSChristoph Böhmwalder err = drbd_submit_peer_request(peer_req);
270082bc0194SAndreas Gruenbacher if (!err)
270182bc0194SAndreas Gruenbacher return 0;
2702b411b363SPhilipp Reisner
270310f6d992SLars Ellenberg /* don't care for the reason here */
2704d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n");
27050500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
2706a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list);
2707b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req);
27080500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
270921ae5d7fSLars Ellenberg if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
271021ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2711b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &peer_req->i);
271221ae5d7fSLars Ellenberg }
271322cc37a9SLars Ellenberg
2714b411b363SPhilipp Reisner out_interrupted:
27157e5fec31SFabian Frederick drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2716b30ab791SAndreas Gruenbacher put_ldev(device);
2717b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
271882bc0194SAndreas Gruenbacher return err;
2719b411b363SPhilipp Reisner }
2720b411b363SPhilipp Reisner
27210f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy,
27220f0601f4SLars Ellenberg * and current sync rate is above c_min_rate.
27230f0601f4SLars Ellenberg *
27240f0601f4SLars Ellenberg * To decide whether or not the lower device is busy, we use a scheme similar
27250f0601f4SLars Ellenberg * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
27260f0601f4SLars Ellenberg * (more than 64 sectors) of activity we cannot account for with our own resync
27270f0601f4SLars Ellenberg * activity, it obviously is "busy".
27280f0601f4SLars Ellenberg *
27290f0601f4SLars Ellenberg * The current sync rate used here uses only the most recent two step marks,
27300f0601f4SLars Ellenberg * to have a short time average so we can react faster.
27310f0601f4SLars Ellenberg */
drbd_rs_should_slow_down(struct drbd_peer_device * peer_device,sector_t sector,bool throttle_if_app_is_waiting)27320d11f3cfSChristoph Böhmwalder bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
2733ad3fee79SLars Ellenberg bool throttle_if_app_is_waiting)
2734e8299874SLars Ellenberg {
27350d11f3cfSChristoph Böhmwalder struct drbd_device *device = peer_device->device;
2736e8299874SLars Ellenberg struct lc_element *tmp;
2737ad3fee79SLars Ellenberg bool throttle = drbd_rs_c_min_rate_throttle(device);
2738e8299874SLars Ellenberg
2739ad3fee79SLars Ellenberg if (!throttle || throttle_if_app_is_waiting)
2740ad3fee79SLars Ellenberg return throttle;
2741e8299874SLars Ellenberg
2742e8299874SLars Ellenberg spin_lock_irq(&device->al_lock);
2743e8299874SLars Ellenberg tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2744e8299874SLars Ellenberg if (tmp) {
2745e8299874SLars Ellenberg struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2746e8299874SLars Ellenberg if (test_bit(BME_PRIORITY, &bm_ext->flags))
2747e8299874SLars Ellenberg throttle = false;
2748ad3fee79SLars Ellenberg /* Do not slow down if app IO is already waiting for this extent,
2749ad3fee79SLars Ellenberg * and our progress is necessary for application IO to complete. */
2750e8299874SLars Ellenberg }
2751e8299874SLars Ellenberg spin_unlock_irq(&device->al_lock);
2752e8299874SLars Ellenberg
2753e8299874SLars Ellenberg return throttle;
2754e8299874SLars Ellenberg }
2755e8299874SLars Ellenberg
drbd_rs_c_min_rate_throttle(struct drbd_device * device)2756e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
27570f0601f4SLars Ellenberg {
27588c40c7c4SChristoph Hellwig struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
27590f0601f4SLars Ellenberg unsigned long db, dt, dbdt;
2760daeda1ccSPhilipp Reisner unsigned int c_min_rate;
2761e8299874SLars Ellenberg int curr_events;
2762daeda1ccSPhilipp Reisner
2763daeda1ccSPhilipp Reisner rcu_read_lock();
2764b30ab791SAndreas Gruenbacher c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2765daeda1ccSPhilipp Reisner rcu_read_unlock();
27660f0601f4SLars Ellenberg
27670f0601f4SLars Ellenberg /* feature disabled? */
2768daeda1ccSPhilipp Reisner if (c_min_rate == 0)
2769e8299874SLars Ellenberg return false;
2770e3555d85SPhilipp Reisner
27718446fe92SChristoph Hellwig curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
2772b30ab791SAndreas Gruenbacher atomic_read(&device->rs_sect_ev);
2773ad3fee79SLars Ellenberg
2774ad3fee79SLars Ellenberg if (atomic_read(&device->ap_actlog_cnt)
2775ff8bd88bSLars Ellenberg || curr_events - device->rs_last_events > 64) {
27760f0601f4SLars Ellenberg unsigned long rs_left;
27770f0601f4SLars Ellenberg int i;
27780f0601f4SLars Ellenberg
2779b30ab791SAndreas Gruenbacher device->rs_last_events = curr_events;
27800f0601f4SLars Ellenberg
27810f0601f4SLars Ellenberg /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
27820f0601f4SLars Ellenberg * approx. */
2783b30ab791SAndreas Gruenbacher i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
27842649f080SLars Ellenberg
2785b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2786b30ab791SAndreas Gruenbacher rs_left = device->ov_left;
27872649f080SLars Ellenberg else
2788b30ab791SAndreas Gruenbacher rs_left = drbd_bm_total_weight(device) - device->rs_failed;
27890f0601f4SLars Ellenberg
2790b30ab791SAndreas Gruenbacher dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
27910f0601f4SLars Ellenberg if (!dt)
27920f0601f4SLars Ellenberg dt++;
2793b30ab791SAndreas Gruenbacher db = device->rs_mark_left[i] - rs_left;
27940f0601f4SLars Ellenberg dbdt = Bit2KB(db/dt);
27950f0601f4SLars Ellenberg
2796daeda1ccSPhilipp Reisner if (dbdt > c_min_rate)
2797e8299874SLars Ellenberg return true;
27980f0601f4SLars Ellenberg }
2799e8299874SLars Ellenberg return false;
28000f0601f4SLars Ellenberg }
28010f0601f4SLars Ellenberg
receive_DataRequest(struct drbd_connection * connection,struct packet_info * pi)2802bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2803b411b363SPhilipp Reisner {
28049f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
2805b30ab791SAndreas Gruenbacher struct drbd_device *device;
2806b411b363SPhilipp Reisner sector_t sector;
28074a76b161SAndreas Gruenbacher sector_t capacity;
2808db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req;
2809b411b363SPhilipp Reisner struct digest_info *di = NULL;
2810b18b37beSPhilipp Reisner int size, verb;
2811e658983aSAndreas Gruenbacher struct p_block_req *p = pi->data;
28124a76b161SAndreas Gruenbacher
28139f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
28149f4fe9adSAndreas Gruenbacher if (!peer_device)
28154a76b161SAndreas Gruenbacher return -EIO;
28169f4fe9adSAndreas Gruenbacher device = peer_device->device;
2817155bd9d1SChristoph Hellwig capacity = get_capacity(device->vdisk);
2818b411b363SPhilipp Reisner
2819b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
2820b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize);
2821b411b363SPhilipp Reisner
2822c670a398SAndreas Gruenbacher if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2823d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2824b411b363SPhilipp Reisner (unsigned long long)sector, size);
282582bc0194SAndreas Gruenbacher return -EINVAL;
2826b411b363SPhilipp Reisner }
2827b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) {
2828d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2829b411b363SPhilipp Reisner (unsigned long long)sector, size);
283082bc0194SAndreas Gruenbacher return -EINVAL;
2831b411b363SPhilipp Reisner }
2832b411b363SPhilipp Reisner
2833b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2834b18b37beSPhilipp Reisner verb = 1;
2835e2857216SAndreas Gruenbacher switch (pi->cmd) {
2836b18b37beSPhilipp Reisner case P_DATA_REQUEST:
283769a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2838b18b37beSPhilipp Reisner break;
2839700ca8c0SPhilipp Reisner case P_RS_THIN_REQ:
2840b18b37beSPhilipp Reisner case P_RS_DATA_REQUEST:
2841b18b37beSPhilipp Reisner case P_CSUM_RS_REQUEST:
2842b18b37beSPhilipp Reisner case P_OV_REQUEST:
284369a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2844b18b37beSPhilipp Reisner break;
2845b18b37beSPhilipp Reisner case P_OV_REPLY:
2846b18b37beSPhilipp Reisner verb = 0;
28470d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
284869a22773SAndreas Gruenbacher drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2849b18b37beSPhilipp Reisner break;
2850b18b37beSPhilipp Reisner default:
285149ba9b1bSAndreas Gruenbacher BUG();
2852b18b37beSPhilipp Reisner }
2853e3fa02d7SChristoph Böhmwalder if (verb && drbd_ratelimit())
2854d0180171SAndreas Gruenbacher drbd_err(device, "Can not satisfy peer's read request, "
2855b411b363SPhilipp Reisner "no local data.\n");
2856b18b37beSPhilipp Reisner
2857a821cc4aSLars Ellenberg /* drain possibly payload */
285869a22773SAndreas Gruenbacher return drbd_drain_block(peer_device, pi->size);
2859b411b363SPhilipp Reisner }
2860b411b363SPhilipp Reisner
2861b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2862b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD,
2863b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */
2864a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
28659104d31aSLars Ellenberg size, GFP_NOIO);
2866db830c46SAndreas Gruenbacher if (!peer_req) {
2867b30ab791SAndreas Gruenbacher put_ldev(device);
286882bc0194SAndreas Gruenbacher return -ENOMEM;
2869b411b363SPhilipp Reisner }
2870ce668b6dSChristoph Böhmwalder peer_req->opf = REQ_OP_READ;
2871b411b363SPhilipp Reisner
2872e2857216SAndreas Gruenbacher switch (pi->cmd) {
2873b411b363SPhilipp Reisner case P_DATA_REQUEST:
2874a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_data_req;
287580a40e43SLars Ellenberg /* application IO, don't drbd_rs_begin_io */
287621ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION;
287780a40e43SLars Ellenberg goto submit;
287880a40e43SLars Ellenberg
2879700ca8c0SPhilipp Reisner case P_RS_THIN_REQ:
2880700ca8c0SPhilipp Reisner /* If at some point in the future we have a smart way to
2881700ca8c0SPhilipp Reisner find out if this data block is completely deallocated,
2882700ca8c0SPhilipp Reisner then we would do something smarter here than reading
2883700ca8c0SPhilipp Reisner the block... */
2884700ca8c0SPhilipp Reisner peer_req->flags |= EE_RS_THIN_REQ;
2885df561f66SGustavo A. R. Silva fallthrough;
2886b411b363SPhilipp Reisner case P_RS_DATA_REQUEST:
2887a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_rsdata_req;
28885f9915bbSLars Ellenberg /* used in the sector offset progress display */
2889b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2890b411b363SPhilipp Reisner break;
2891b411b363SPhilipp Reisner
2892b411b363SPhilipp Reisner case P_OV_REPLY:
2893b411b363SPhilipp Reisner case P_CSUM_RS_REQUEST:
2894e2857216SAndreas Gruenbacher di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2895b411b363SPhilipp Reisner if (!di)
2896b411b363SPhilipp Reisner goto out_free_e;
2897b411b363SPhilipp Reisner
2898e2857216SAndreas Gruenbacher di->digest_size = pi->size;
2899b411b363SPhilipp Reisner di->digest = (((char *)di)+sizeof(struct digest_info));
2900b411b363SPhilipp Reisner
2901db830c46SAndreas Gruenbacher peer_req->digest = di;
2902db830c46SAndreas Gruenbacher peer_req->flags |= EE_HAS_DIGEST;
2903c36c3cedSLars Ellenberg
29049f4fe9adSAndreas Gruenbacher if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2905b411b363SPhilipp Reisner goto out_free_e;
2906b411b363SPhilipp Reisner
2907e2857216SAndreas Gruenbacher if (pi->cmd == P_CSUM_RS_REQUEST) {
29089f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2909a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_csum_rs_req;
29105f9915bbSLars Ellenberg /* used in the sector offset progress display */
2911b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2912aaaba345SLars Ellenberg /* remember to report stats in drbd_resync_finished */
2913aaaba345SLars Ellenberg device->use_csums = true;
2914e2857216SAndreas Gruenbacher } else if (pi->cmd == P_OV_REPLY) {
29152649f080SLars Ellenberg /* track progress, we may need to throttle */
2916b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_in);
2917a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_reply;
29180d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
29190f0601f4SLars Ellenberg /* drbd_rs_begin_io done when we sent this request,
29200f0601f4SLars Ellenberg * but accounting still needs to be done. */
29210f0601f4SLars Ellenberg goto submit_for_resync;
2922b411b363SPhilipp Reisner }
2923b411b363SPhilipp Reisner break;
2924b411b363SPhilipp Reisner
2925b411b363SPhilipp Reisner case P_OV_REQUEST:
2926b30ab791SAndreas Gruenbacher if (device->ov_start_sector == ~(sector_t)0 &&
29279f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90) {
2928de228bbaSLars Ellenberg unsigned long now = jiffies;
2929de228bbaSLars Ellenberg int i;
2930b30ab791SAndreas Gruenbacher device->ov_start_sector = sector;
2931b30ab791SAndreas Gruenbacher device->ov_position = sector;
2932b30ab791SAndreas Gruenbacher device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2933b30ab791SAndreas Gruenbacher device->rs_total = device->ov_left;
2934de228bbaSLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2935b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = device->ov_left;
2936b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now;
2937de228bbaSLars Ellenberg }
2938d0180171SAndreas Gruenbacher drbd_info(device, "Online Verify start sector: %llu\n",
2939b411b363SPhilipp Reisner (unsigned long long)sector);
2940b411b363SPhilipp Reisner }
2941a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_req;
2942b411b363SPhilipp Reisner break;
2943b411b363SPhilipp Reisner
2944b411b363SPhilipp Reisner default:
294549ba9b1bSAndreas Gruenbacher BUG();
2946b411b363SPhilipp Reisner }
2947b411b363SPhilipp Reisner
29480f0601f4SLars Ellenberg /* Throttle, drbd_rs_begin_io and submit should become asynchronous
29490f0601f4SLars Ellenberg * wrt the receiver, but it is not as straightforward as it may seem.
29500f0601f4SLars Ellenberg * Various places in the resync start and stop logic assume resync
29510f0601f4SLars Ellenberg * requests are processed in order, requeuing this on the worker thread
29520f0601f4SLars Ellenberg * introduces a bunch of new code for synchronization between threads.
29530f0601f4SLars Ellenberg *
29540f0601f4SLars Ellenberg * Unlimited throttling before drbd_rs_begin_io may stall the resync
29550f0601f4SLars Ellenberg * "forever", throttling after drbd_rs_begin_io will lock that extent
29560f0601f4SLars Ellenberg * for application writes for the same time. For now, just throttle
29570f0601f4SLars Ellenberg * here, where the rest of the code expects the receiver to sleep for
29580f0601f4SLars Ellenberg * a while, anyways.
29590f0601f4SLars Ellenberg */
2960b411b363SPhilipp Reisner
29610f0601f4SLars Ellenberg /* Throttle before drbd_rs_begin_io, as that locks out application IO;
29620f0601f4SLars Ellenberg * this defers syncer requests for some time, before letting at least
29630f0601f4SLars Ellenberg * on request through. The resync controller on the receiving side
29640f0601f4SLars Ellenberg * will adapt to the incoming rate accordingly.
29650f0601f4SLars Ellenberg *
29660f0601f4SLars Ellenberg * We cannot throttle here if remote is Primary/SyncTarget:
29670f0601f4SLars Ellenberg * we would also throttle its application reads.
29680f0601f4SLars Ellenberg * In that case, throttling is done on the SyncTarget only.
29690f0601f4SLars Ellenberg */
2970c5a2c150SLars Ellenberg
2971c5a2c150SLars Ellenberg /* Even though this may be a resync request, we do add to "read_ee";
2972c5a2c150SLars Ellenberg * "sync_ee" is only used for resync WRITEs.
2973c5a2c150SLars Ellenberg * Add to list early, so debugfs can find this request
2974c5a2c150SLars Ellenberg * even if we have to sleep below. */
2975c5a2c150SLars Ellenberg spin_lock_irq(&device->resource->req_lock);
2976c5a2c150SLars Ellenberg list_add_tail(&peer_req->w.list, &device->read_ee);
2977c5a2c150SLars Ellenberg spin_unlock_irq(&device->resource->req_lock);
2978c5a2c150SLars Ellenberg
2979944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2980ad3fee79SLars Ellenberg if (device->state.peer != R_PRIMARY
29810d11f3cfSChristoph Böhmwalder && drbd_rs_should_slow_down(peer_device, sector, false))
2982e3555d85SPhilipp Reisner schedule_timeout_uninterruptible(HZ/10);
2983944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_begin_io);
2984b30ab791SAndreas Gruenbacher if (drbd_rs_begin_io(device, sector))
298580a40e43SLars Ellenberg goto out_free_e;
2986b411b363SPhilipp Reisner
29870f0601f4SLars Ellenberg submit_for_resync:
2988b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev);
29890f0601f4SLars Ellenberg
299080a40e43SLars Ellenberg submit:
2991944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_submit_peer_request);
2992b30ab791SAndreas Gruenbacher inc_unacked(device);
2993ce668b6dSChristoph Böhmwalder if (drbd_submit_peer_request(peer_req) == 0)
299482bc0194SAndreas Gruenbacher return 0;
2995b411b363SPhilipp Reisner
299610f6d992SLars Ellenberg /* don't care for the reason here */
2997d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n");
2998c5a2c150SLars Ellenberg
2999c5a2c150SLars Ellenberg out_free_e:
30000500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
3001a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list);
30020500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
300322cc37a9SLars Ellenberg /* no drbd_rs_complete_io(), we are dropping the connection anyways */
300422cc37a9SLars Ellenberg
3005b30ab791SAndreas Gruenbacher put_ldev(device);
3006b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req);
300782bc0194SAndreas Gruenbacher return -EIO;
3008b411b363SPhilipp Reisner }
3009b411b363SPhilipp Reisner
30109b48ff07SLee Jones /*
301169a22773SAndreas Gruenbacher * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
301269a22773SAndreas Gruenbacher */
drbd_asb_recover_0p(struct drbd_peer_device * peer_device)301369a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
3014b411b363SPhilipp Reisner {
301569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
3016b411b363SPhilipp Reisner int self, peer, rv = -100;
3017b411b363SPhilipp Reisner unsigned long ch_self, ch_peer;
301844ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_0p;
3019b411b363SPhilipp Reisner
3020b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & 1;
3021b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & 1;
3022b411b363SPhilipp Reisner
3023b30ab791SAndreas Gruenbacher ch_peer = device->p_uuid[UI_SIZE];
3024b30ab791SAndreas Gruenbacher ch_self = device->comm_bm_set;
3025b411b363SPhilipp Reisner
302644ed167dSPhilipp Reisner rcu_read_lock();
302769a22773SAndreas Gruenbacher after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
302844ed167dSPhilipp Reisner rcu_read_unlock();
302944ed167dSPhilipp Reisner switch (after_sb_0p) {
3030b411b363SPhilipp Reisner case ASB_CONSENSUS:
3031b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY:
3032b411b363SPhilipp Reisner case ASB_CALL_HELPER:
303344ed167dSPhilipp Reisner case ASB_VIOLENTLY:
3034d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n");
3035b411b363SPhilipp Reisner break;
3036b411b363SPhilipp Reisner case ASB_DISCONNECT:
3037b411b363SPhilipp Reisner break;
3038b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI:
3039b411b363SPhilipp Reisner if (self == 0 && peer == 1) {
3040b411b363SPhilipp Reisner rv = -1;
3041b411b363SPhilipp Reisner break;
3042b411b363SPhilipp Reisner }
3043b411b363SPhilipp Reisner if (self == 1 && peer == 0) {
3044b411b363SPhilipp Reisner rv = 1;
3045b411b363SPhilipp Reisner break;
3046b411b363SPhilipp Reisner }
3047df561f66SGustavo A. R. Silva fallthrough; /* to one of the other strategies */
3048b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI:
3049b411b363SPhilipp Reisner if (self == 0 && peer == 1) {
3050b411b363SPhilipp Reisner rv = 1;
3051b411b363SPhilipp Reisner break;
3052b411b363SPhilipp Reisner }
3053b411b363SPhilipp Reisner if (self == 1 && peer == 0) {
3054b411b363SPhilipp Reisner rv = -1;
3055b411b363SPhilipp Reisner break;
3056b411b363SPhilipp Reisner }
3057b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */
3058d0180171SAndreas Gruenbacher drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3059b411b363SPhilipp Reisner "Using discard-least-changes instead\n");
3060df561f66SGustavo A. R. Silva fallthrough;
3061b411b363SPhilipp Reisner case ASB_DISCARD_ZERO_CHG:
3062b411b363SPhilipp Reisner if (ch_peer == 0 && ch_self == 0) {
306369a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3064b411b363SPhilipp Reisner ? -1 : 1;
3065b411b363SPhilipp Reisner break;
3066b411b363SPhilipp Reisner } else {
3067b411b363SPhilipp Reisner if (ch_peer == 0) { rv = 1; break; }
3068b411b363SPhilipp Reisner if (ch_self == 0) { rv = -1; break; }
3069b411b363SPhilipp Reisner }
307044ed167dSPhilipp Reisner if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3071b411b363SPhilipp Reisner break;
3072df561f66SGustavo A. R. Silva fallthrough;
3073b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG:
3074b411b363SPhilipp Reisner if (ch_self < ch_peer)
3075b411b363SPhilipp Reisner rv = -1;
3076b411b363SPhilipp Reisner else if (ch_self > ch_peer)
3077b411b363SPhilipp Reisner rv = 1;
3078b411b363SPhilipp Reisner else /* ( ch_self == ch_peer ) */
3079b411b363SPhilipp Reisner /* Well, then use something else. */
308069a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3081b411b363SPhilipp Reisner ? -1 : 1;
3082b411b363SPhilipp Reisner break;
3083b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL:
3084b411b363SPhilipp Reisner rv = -1;
3085b411b363SPhilipp Reisner break;
3086b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE:
3087b411b363SPhilipp Reisner rv = 1;
3088b411b363SPhilipp Reisner }
3089b411b363SPhilipp Reisner
3090b411b363SPhilipp Reisner return rv;
3091b411b363SPhilipp Reisner }
3092b411b363SPhilipp Reisner
30939b48ff07SLee Jones /*
309469a22773SAndreas Gruenbacher * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
309569a22773SAndreas Gruenbacher */
drbd_asb_recover_1p(struct drbd_peer_device * peer_device)309669a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3097b411b363SPhilipp Reisner {
309869a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
30996184ea21SAndreas Gruenbacher int hg, rv = -100;
310044ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_1p;
3101b411b363SPhilipp Reisner
310244ed167dSPhilipp Reisner rcu_read_lock();
310369a22773SAndreas Gruenbacher after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
310444ed167dSPhilipp Reisner rcu_read_unlock();
310544ed167dSPhilipp Reisner switch (after_sb_1p) {
3106b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI:
3107b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI:
3108b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG:
3109b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL:
3110b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE:
311144ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG:
3112d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n");
3113b411b363SPhilipp Reisner break;
3114b411b363SPhilipp Reisner case ASB_DISCONNECT:
3115b411b363SPhilipp Reisner break;
3116b411b363SPhilipp Reisner case ASB_CONSENSUS:
311769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device);
3118b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_SECONDARY)
3119b411b363SPhilipp Reisner rv = hg;
3120b30ab791SAndreas Gruenbacher if (hg == 1 && device->state.role == R_PRIMARY)
3121b411b363SPhilipp Reisner rv = hg;
3122b411b363SPhilipp Reisner break;
3123b411b363SPhilipp Reisner case ASB_VIOLENTLY:
312469a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device);
3125b411b363SPhilipp Reisner break;
3126b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY:
3127b30ab791SAndreas Gruenbacher return device->state.role == R_PRIMARY ? 1 : -1;
3128b411b363SPhilipp Reisner case ASB_CALL_HELPER:
312969a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device);
3130b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_PRIMARY) {
3131bb437946SAndreas Gruenbacher enum drbd_state_rv rv2;
3132bb437946SAndreas Gruenbacher
3133b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3134b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient.
3135b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */
3136b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3137bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) {
3138b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb");
3139b411b363SPhilipp Reisner } else {
3140d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n");
3141b411b363SPhilipp Reisner rv = hg;
3142b411b363SPhilipp Reisner }
3143b411b363SPhilipp Reisner } else
3144b411b363SPhilipp Reisner rv = hg;
3145b411b363SPhilipp Reisner }
3146b411b363SPhilipp Reisner
3147b411b363SPhilipp Reisner return rv;
3148b411b363SPhilipp Reisner }
3149b411b363SPhilipp Reisner
31509b48ff07SLee Jones /*
315169a22773SAndreas Gruenbacher * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
315269a22773SAndreas Gruenbacher */
drbd_asb_recover_2p(struct drbd_peer_device * peer_device)315369a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3154b411b363SPhilipp Reisner {
315569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
31566184ea21SAndreas Gruenbacher int hg, rv = -100;
315744ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_2p;
3158b411b363SPhilipp Reisner
315944ed167dSPhilipp Reisner rcu_read_lock();
316069a22773SAndreas Gruenbacher after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
316144ed167dSPhilipp Reisner rcu_read_unlock();
316244ed167dSPhilipp Reisner switch (after_sb_2p) {
3163b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI:
3164b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI:
3165b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG:
3166b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL:
3167b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE:
3168b411b363SPhilipp Reisner case ASB_CONSENSUS:
3169b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY:
317044ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG:
3171d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n");
3172b411b363SPhilipp Reisner break;
3173b411b363SPhilipp Reisner case ASB_VIOLENTLY:
317469a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device);
3175b411b363SPhilipp Reisner break;
3176b411b363SPhilipp Reisner case ASB_DISCONNECT:
3177b411b363SPhilipp Reisner break;
3178b411b363SPhilipp Reisner case ASB_CALL_HELPER:
317969a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device);
3180b411b363SPhilipp Reisner if (hg == -1) {
3181bb437946SAndreas Gruenbacher enum drbd_state_rv rv2;
3182bb437946SAndreas Gruenbacher
3183b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3184b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient.
3185b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */
3186b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3187bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) {
3188b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb");
3189b411b363SPhilipp Reisner } else {
3190d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n");
3191b411b363SPhilipp Reisner rv = hg;
3192b411b363SPhilipp Reisner }
3193b411b363SPhilipp Reisner } else
3194b411b363SPhilipp Reisner rv = hg;
3195b411b363SPhilipp Reisner }
3196b411b363SPhilipp Reisner
3197b411b363SPhilipp Reisner return rv;
3198b411b363SPhilipp Reisner }
3199b411b363SPhilipp Reisner
drbd_uuid_dump(struct drbd_device * device,char * text,u64 * uuid,u64 bits,u64 flags)3200b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3201b411b363SPhilipp Reisner u64 bits, u64 flags)
3202b411b363SPhilipp Reisner {
3203b411b363SPhilipp Reisner if (!uuid) {
3204d0180171SAndreas Gruenbacher drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3205b411b363SPhilipp Reisner return;
3206b411b363SPhilipp Reisner }
3207d0180171SAndreas Gruenbacher drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3208b411b363SPhilipp Reisner text,
3209b411b363SPhilipp Reisner (unsigned long long)uuid[UI_CURRENT],
3210b411b363SPhilipp Reisner (unsigned long long)uuid[UI_BITMAP],
3211b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_START],
3212b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_END],
3213b411b363SPhilipp Reisner (unsigned long long)bits,
3214b411b363SPhilipp Reisner (unsigned long long)flags);
3215b411b363SPhilipp Reisner }
3216b411b363SPhilipp Reisner
3217b411b363SPhilipp Reisner /*
3218b411b363SPhilipp Reisner 100 after split brain try auto recover
3219b411b363SPhilipp Reisner 2 C_SYNC_SOURCE set BitMap
3220b411b363SPhilipp Reisner 1 C_SYNC_SOURCE use BitMap
3221b411b363SPhilipp Reisner 0 no Sync
3222b411b363SPhilipp Reisner -1 C_SYNC_TARGET use BitMap
3223b411b363SPhilipp Reisner -2 C_SYNC_TARGET set BitMap
3224b411b363SPhilipp Reisner -100 after split brain, disconnect
3225b411b363SPhilipp Reisner -1000 unrelated data
32264a23f264SPhilipp Reisner -1091 requires proto 91
32274a23f264SPhilipp Reisner -1096 requires proto 96
3228b411b363SPhilipp Reisner */
3229f2d3d75bSLars Ellenberg
drbd_uuid_compare(struct drbd_peer_device * const peer_device,enum drbd_role const peer_role,int * rule_nr)3230db445db1SChristoph Böhmwalder static int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
3231db445db1SChristoph Böhmwalder enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3232b411b363SPhilipp Reisner {
3233db445db1SChristoph Böhmwalder struct drbd_connection *const connection = peer_device->connection;
3234db445db1SChristoph Böhmwalder struct drbd_device *device = peer_device->device;
3235b411b363SPhilipp Reisner u64 self, peer;
3236b411b363SPhilipp Reisner int i, j;
3237b411b363SPhilipp Reisner
3238b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3239b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3240b411b363SPhilipp Reisner
3241b411b363SPhilipp Reisner *rule_nr = 10;
3242b411b363SPhilipp Reisner if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3243b411b363SPhilipp Reisner return 0;
3244b411b363SPhilipp Reisner
3245b411b363SPhilipp Reisner *rule_nr = 20;
3246b411b363SPhilipp Reisner if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3247b411b363SPhilipp Reisner peer != UUID_JUST_CREATED)
3248b411b363SPhilipp Reisner return -2;
3249b411b363SPhilipp Reisner
3250b411b363SPhilipp Reisner *rule_nr = 30;
3251b411b363SPhilipp Reisner if (self != UUID_JUST_CREATED &&
3252b411b363SPhilipp Reisner (peer == UUID_JUST_CREATED || peer == (u64)0))
3253b411b363SPhilipp Reisner return 2;
3254b411b363SPhilipp Reisner
3255b411b363SPhilipp Reisner if (self == peer) {
3256b411b363SPhilipp Reisner int rct, dc; /* roles at crash time */
3257b411b363SPhilipp Reisner
3258b30ab791SAndreas Gruenbacher if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3259b411b363SPhilipp Reisner
326044a4d551SLars Ellenberg if (connection->agreed_pro_version < 91)
32614a23f264SPhilipp Reisner return -1091;
3262b411b363SPhilipp Reisner
3263b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3264b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3265d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3266b30ab791SAndreas Gruenbacher drbd_uuid_move_history(device);
3267b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3268b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_BITMAP] = 0;
3269b411b363SPhilipp Reisner
3270b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3271b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3272b411b363SPhilipp Reisner *rule_nr = 34;
3273b411b363SPhilipp Reisner } else {
3274d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3275b411b363SPhilipp Reisner *rule_nr = 36;
3276b411b363SPhilipp Reisner }
3277b411b363SPhilipp Reisner
3278b411b363SPhilipp Reisner return 1;
3279b411b363SPhilipp Reisner }
3280b411b363SPhilipp Reisner
3281b30ab791SAndreas Gruenbacher if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3282b411b363SPhilipp Reisner
328344a4d551SLars Ellenberg if (connection->agreed_pro_version < 91)
32844a23f264SPhilipp Reisner return -1091;
3285b411b363SPhilipp Reisner
3286b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3287b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3288d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3289b411b363SPhilipp Reisner
3290b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3291b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3292b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = 0UL;
3293b411b363SPhilipp Reisner
3294b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3295b411b363SPhilipp Reisner *rule_nr = 35;
3296b411b363SPhilipp Reisner } else {
3297d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3298b411b363SPhilipp Reisner *rule_nr = 37;
3299b411b363SPhilipp Reisner }
3300b411b363SPhilipp Reisner
3301b411b363SPhilipp Reisner return -1;
3302b411b363SPhilipp Reisner }
3303b411b363SPhilipp Reisner
3304b411b363SPhilipp Reisner /* Common power [off|failure] */
3305b30ab791SAndreas Gruenbacher rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3306b30ab791SAndreas Gruenbacher (device->p_uuid[UI_FLAGS] & 2);
3307b411b363SPhilipp Reisner /* lowest bit is set when we were primary,
3308b411b363SPhilipp Reisner * next bit (weight 2) is set when peer was primary */
3309b411b363SPhilipp Reisner *rule_nr = 40;
3310b411b363SPhilipp Reisner
3311f2d3d75bSLars Ellenberg /* Neither has the "crashed primary" flag set,
3312f2d3d75bSLars Ellenberg * only a replication link hickup. */
3313f2d3d75bSLars Ellenberg if (rct == 0)
3314f2d3d75bSLars Ellenberg return 0;
3315f2d3d75bSLars Ellenberg
3316f2d3d75bSLars Ellenberg /* Current UUID equal and no bitmap uuid; does not necessarily
3317f2d3d75bSLars Ellenberg * mean this was a "simultaneous hard crash", maybe IO was
3318f2d3d75bSLars Ellenberg * frozen, so no UUID-bump happened.
3319f2d3d75bSLars Ellenberg * This is a protocol change, overload DRBD_FF_WSAME as flag
3320f2d3d75bSLars Ellenberg * for "new-enough" peer DRBD version. */
3321f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3322f2d3d75bSLars Ellenberg *rule_nr = 41;
3323f2d3d75bSLars Ellenberg if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3324f2d3d75bSLars Ellenberg drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3325f2d3d75bSLars Ellenberg return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3326f2d3d75bSLars Ellenberg }
3327f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3328f2d3d75bSLars Ellenberg /* At least one has the "crashed primary" bit set,
3329f2d3d75bSLars Ellenberg * both are primary now, but neither has rotated its UUIDs?
3330f2d3d75bSLars Ellenberg * "Can not happen." */
3331f2d3d75bSLars Ellenberg drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3332f2d3d75bSLars Ellenberg return -100;
3333f2d3d75bSLars Ellenberg }
3334f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY)
3335f2d3d75bSLars Ellenberg return 1;
3336f2d3d75bSLars Ellenberg return -1;
3337f2d3d75bSLars Ellenberg }
3338f2d3d75bSLars Ellenberg
3339f2d3d75bSLars Ellenberg /* Both are secondary.
3340f2d3d75bSLars Ellenberg * Really looks like recovery from simultaneous hard crash.
3341f2d3d75bSLars Ellenberg * Check which had been primary before, and arbitrate. */
3342b411b363SPhilipp Reisner switch (rct) {
3343f2d3d75bSLars Ellenberg case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
3344b411b363SPhilipp Reisner case 1: /* self_pri && !peer_pri */ return 1;
3345b411b363SPhilipp Reisner case 2: /* !self_pri && peer_pri */ return -1;
3346b411b363SPhilipp Reisner case 3: /* self_pri && peer_pri */
334744a4d551SLars Ellenberg dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3348b411b363SPhilipp Reisner return dc ? -1 : 1;
3349b411b363SPhilipp Reisner }
3350b411b363SPhilipp Reisner }
3351b411b363SPhilipp Reisner
3352b411b363SPhilipp Reisner *rule_nr = 50;
3353b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3354b411b363SPhilipp Reisner if (self == peer)
3355b411b363SPhilipp Reisner return -1;
3356b411b363SPhilipp Reisner
3357b411b363SPhilipp Reisner *rule_nr = 51;
3358b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3359b411b363SPhilipp Reisner if (self == peer) {
336044a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ?
3361b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3362b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3363b30ab791SAndreas Gruenbacher peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3364b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of
3365b411b363SPhilipp Reisner resync as sync source modifications of the peer's UUIDs. */
3366b411b363SPhilipp Reisner
336744a4d551SLars Ellenberg if (connection->agreed_pro_version < 91)
33684a23f264SPhilipp Reisner return -1091;
3369b411b363SPhilipp Reisner
3370b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3371b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
33724a23f264SPhilipp Reisner
3373d0180171SAndreas Gruenbacher drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3374b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
33754a23f264SPhilipp Reisner
3376b411b363SPhilipp Reisner return -1;
3377b411b363SPhilipp Reisner }
3378b411b363SPhilipp Reisner }
3379b411b363SPhilipp Reisner
3380b411b363SPhilipp Reisner *rule_nr = 60;
3381b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3382b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3383b30ab791SAndreas Gruenbacher peer = device->p_uuid[i] & ~((u64)1);
3384b411b363SPhilipp Reisner if (self == peer)
3385b411b363SPhilipp Reisner return -2;
3386b411b363SPhilipp Reisner }
3387b411b363SPhilipp Reisner
3388b411b363SPhilipp Reisner *rule_nr = 70;
3389b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3390b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3391b411b363SPhilipp Reisner if (self == peer)
3392b411b363SPhilipp Reisner return 1;
3393b411b363SPhilipp Reisner
3394b411b363SPhilipp Reisner *rule_nr = 71;
3395b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3396b411b363SPhilipp Reisner if (self == peer) {
339744a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ?
3398b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3399b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3400b30ab791SAndreas Gruenbacher self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3401b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of
3402b411b363SPhilipp Reisner resync as sync source modifications of our UUIDs. */
3403b411b363SPhilipp Reisner
340444a4d551SLars Ellenberg if (connection->agreed_pro_version < 91)
34054a23f264SPhilipp Reisner return -1091;
3406b411b363SPhilipp Reisner
3407b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3408b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3409b411b363SPhilipp Reisner
3410d0180171SAndreas Gruenbacher drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3411b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3412b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3413b411b363SPhilipp Reisner
3414b411b363SPhilipp Reisner return 1;
3415b411b363SPhilipp Reisner }
3416b411b363SPhilipp Reisner }
3417b411b363SPhilipp Reisner
3418b411b363SPhilipp Reisner
3419b411b363SPhilipp Reisner *rule_nr = 80;
3420b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3421b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3422b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1);
3423b411b363SPhilipp Reisner if (self == peer)
3424b411b363SPhilipp Reisner return 2;
3425b411b363SPhilipp Reisner }
3426b411b363SPhilipp Reisner
3427b411b363SPhilipp Reisner *rule_nr = 90;
3428b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3429b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3430b411b363SPhilipp Reisner if (self == peer && self != ((u64)0))
3431b411b363SPhilipp Reisner return 100;
3432b411b363SPhilipp Reisner
3433b411b363SPhilipp Reisner *rule_nr = 100;
3434b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3435b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1);
3436b411b363SPhilipp Reisner for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3437b30ab791SAndreas Gruenbacher peer = device->p_uuid[j] & ~((u64)1);
3438b411b363SPhilipp Reisner if (self == peer)
3439b411b363SPhilipp Reisner return -100;
3440b411b363SPhilipp Reisner }
3441b411b363SPhilipp Reisner }
3442b411b363SPhilipp Reisner
3443b411b363SPhilipp Reisner return -1000;
3444b411b363SPhilipp Reisner }
3445b411b363SPhilipp Reisner
3446b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or
3447b411b363SPhilipp Reisner CONN_MASK (-1) on failure.
3448b411b363SPhilipp Reisner */
drbd_sync_handshake(struct drbd_peer_device * peer_device,enum drbd_role peer_role,enum drbd_disk_state peer_disk)344969a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
345069a22773SAndreas Gruenbacher enum drbd_role peer_role,
3451b411b363SPhilipp Reisner enum drbd_disk_state peer_disk) __must_hold(local)
3452b411b363SPhilipp Reisner {
345369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
3454b411b363SPhilipp Reisner enum drbd_conns rv = C_MASK;
3455b411b363SPhilipp Reisner enum drbd_disk_state mydisk;
345644ed167dSPhilipp Reisner struct net_conf *nc;
3457d29e89e3SRoland Kammerer int hg, rule_nr, rr_conflict, tentative, always_asbp;
3458b411b363SPhilipp Reisner
3459b30ab791SAndreas Gruenbacher mydisk = device->state.disk;
3460b411b363SPhilipp Reisner if (mydisk == D_NEGOTIATING)
3461b30ab791SAndreas Gruenbacher mydisk = device->new_state_tmp.disk;
3462b411b363SPhilipp Reisner
3463d0180171SAndreas Gruenbacher drbd_info(device, "drbd_sync_handshake:\n");
34649f2247bbSPhilipp Reisner
3465b30ab791SAndreas Gruenbacher spin_lock_irq(&device->ldev->md.uuid_lock);
3466b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3467b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid,
3468b30ab791SAndreas Gruenbacher device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3469b411b363SPhilipp Reisner
3470db445db1SChristoph Böhmwalder hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
3471b30ab791SAndreas Gruenbacher spin_unlock_irq(&device->ldev->md.uuid_lock);
3472b411b363SPhilipp Reisner
3473d0180171SAndreas Gruenbacher drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3474b411b363SPhilipp Reisner
3475b411b363SPhilipp Reisner if (hg == -1000) {
3476d0180171SAndreas Gruenbacher drbd_alert(device, "Unrelated data, aborting!\n");
3477b411b363SPhilipp Reisner return C_MASK;
3478b411b363SPhilipp Reisner }
3479f2d3d75bSLars Ellenberg if (hg < -0x10000) {
3480f2d3d75bSLars Ellenberg int proto, fflags;
3481f2d3d75bSLars Ellenberg hg = -hg;
3482f2d3d75bSLars Ellenberg proto = hg & 0xff;
3483f2d3d75bSLars Ellenberg fflags = (hg >> 8) & 0xff;
3484f2d3d75bSLars Ellenberg drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3485f2d3d75bSLars Ellenberg proto, fflags);
3486f2d3d75bSLars Ellenberg return C_MASK;
3487f2d3d75bSLars Ellenberg }
34884a23f264SPhilipp Reisner if (hg < -1000) {
3489d0180171SAndreas Gruenbacher drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3490b411b363SPhilipp Reisner return C_MASK;
3491b411b363SPhilipp Reisner }
3492b411b363SPhilipp Reisner
3493b411b363SPhilipp Reisner if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3494b411b363SPhilipp Reisner (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3495b411b363SPhilipp Reisner int f = (hg == -100) || abs(hg) == 2;
3496b411b363SPhilipp Reisner hg = mydisk > D_INCONSISTENT ? 1 : -1;
3497b411b363SPhilipp Reisner if (f)
3498b411b363SPhilipp Reisner hg = hg*2;
3499d0180171SAndreas Gruenbacher drbd_info(device, "Becoming sync %s due to disk states.\n",
3500b411b363SPhilipp Reisner hg > 0 ? "source" : "target");
3501b411b363SPhilipp Reisner }
3502b411b363SPhilipp Reisner
35033a11a487SAdam Gandelman if (abs(hg) == 100)
3504b30ab791SAndreas Gruenbacher drbd_khelper(device, "initial-split-brain");
35053a11a487SAdam Gandelman
350644ed167dSPhilipp Reisner rcu_read_lock();
350769a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf);
3508d29e89e3SRoland Kammerer always_asbp = nc->always_asbp;
3509d29e89e3SRoland Kammerer rr_conflict = nc->rr_conflict;
3510d29e89e3SRoland Kammerer tentative = nc->tentative;
3511d29e89e3SRoland Kammerer rcu_read_unlock();
351244ed167dSPhilipp Reisner
3513d29e89e3SRoland Kammerer if (hg == 100 || (hg == -100 && always_asbp)) {
3514b30ab791SAndreas Gruenbacher int pcount = (device->state.role == R_PRIMARY)
3515b411b363SPhilipp Reisner + (peer_role == R_PRIMARY);
3516b411b363SPhilipp Reisner int forced = (hg == -100);
3517b411b363SPhilipp Reisner
3518b411b363SPhilipp Reisner switch (pcount) {
3519b411b363SPhilipp Reisner case 0:
352069a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device);
3521b411b363SPhilipp Reisner break;
3522b411b363SPhilipp Reisner case 1:
352369a22773SAndreas Gruenbacher hg = drbd_asb_recover_1p(peer_device);
3524b411b363SPhilipp Reisner break;
3525b411b363SPhilipp Reisner case 2:
352669a22773SAndreas Gruenbacher hg = drbd_asb_recover_2p(peer_device);
3527b411b363SPhilipp Reisner break;
3528b411b363SPhilipp Reisner }
3529b411b363SPhilipp Reisner if (abs(hg) < 100) {
3530d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, %d primaries, "
3531b411b363SPhilipp Reisner "automatically solved. Sync from %s node\n",
3532b411b363SPhilipp Reisner pcount, (hg < 0) ? "peer" : "this");
3533b411b363SPhilipp Reisner if (forced) {
3534d0180171SAndreas Gruenbacher drbd_warn(device, "Doing a full sync, since"
3535b411b363SPhilipp Reisner " UUIDs where ambiguous.\n");
3536b411b363SPhilipp Reisner hg = hg*2;
3537b411b363SPhilipp Reisner }
3538b411b363SPhilipp Reisner }
3539b411b363SPhilipp Reisner }
3540b411b363SPhilipp Reisner
3541b411b363SPhilipp Reisner if (hg == -100) {
3542b30ab791SAndreas Gruenbacher if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3543b411b363SPhilipp Reisner hg = -1;
3544b30ab791SAndreas Gruenbacher if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3545b411b363SPhilipp Reisner hg = 1;
3546b411b363SPhilipp Reisner
3547b411b363SPhilipp Reisner if (abs(hg) < 100)
3548d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, manually solved. "
3549b411b363SPhilipp Reisner "Sync from %s node\n",
3550b411b363SPhilipp Reisner (hg < 0) ? "peer" : "this");
3551b411b363SPhilipp Reisner }
3552b411b363SPhilipp Reisner
3553b411b363SPhilipp Reisner if (hg == -100) {
3554580b9767SLars Ellenberg /* FIXME this log message is not correct if we end up here
3555580b9767SLars Ellenberg * after an attempted attach on a diskless node.
3556580b9767SLars Ellenberg * We just refuse to attach -- well, we drop the "connection"
3557580b9767SLars Ellenberg * to that disk, in a way... */
3558d0180171SAndreas Gruenbacher drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3559b30ab791SAndreas Gruenbacher drbd_khelper(device, "split-brain");
3560b411b363SPhilipp Reisner return C_MASK;
3561b411b363SPhilipp Reisner }
3562b411b363SPhilipp Reisner
3563b411b363SPhilipp Reisner if (hg > 0 && mydisk <= D_INCONSISTENT) {
3564d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3565b411b363SPhilipp Reisner return C_MASK;
3566b411b363SPhilipp Reisner }
3567b411b363SPhilipp Reisner
3568b411b363SPhilipp Reisner if (hg < 0 && /* by intention we do not use mydisk here. */
3569b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
357044ed167dSPhilipp Reisner switch (rr_conflict) {
3571b411b363SPhilipp Reisner case ASB_CALL_HELPER:
3572b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost");
3573df561f66SGustavo A. R. Silva fallthrough;
3574b411b363SPhilipp Reisner case ASB_DISCONNECT:
3575d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3576b411b363SPhilipp Reisner return C_MASK;
3577b411b363SPhilipp Reisner case ASB_VIOLENTLY:
3578d0180171SAndreas Gruenbacher drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3579b411b363SPhilipp Reisner "assumption\n");
3580b411b363SPhilipp Reisner }
3581b411b363SPhilipp Reisner }
3582b411b363SPhilipp Reisner
358369a22773SAndreas Gruenbacher if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3584cf14c2e9SPhilipp Reisner if (hg == 0)
3585d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3586cf14c2e9SPhilipp Reisner else
3587d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3588cf14c2e9SPhilipp Reisner drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3589cf14c2e9SPhilipp Reisner abs(hg) >= 2 ? "full" : "bit-map based");
3590cf14c2e9SPhilipp Reisner return C_MASK;
3591cf14c2e9SPhilipp Reisner }
3592cf14c2e9SPhilipp Reisner
3593b411b363SPhilipp Reisner if (abs(hg) >= 2) {
3594d0180171SAndreas Gruenbacher drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3595b30ab791SAndreas Gruenbacher if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
35968164dd6cSAndreas Gruenbacher BM_LOCKED_SET_ALLOWED, NULL))
3597b411b363SPhilipp Reisner return C_MASK;
3598b411b363SPhilipp Reisner }
3599b411b363SPhilipp Reisner
3600b411b363SPhilipp Reisner if (hg > 0) { /* become sync source. */
3601b411b363SPhilipp Reisner rv = C_WF_BITMAP_S;
3602b411b363SPhilipp Reisner } else if (hg < 0) { /* become sync target */
3603b411b363SPhilipp Reisner rv = C_WF_BITMAP_T;
3604b411b363SPhilipp Reisner } else {
3605b411b363SPhilipp Reisner rv = C_CONNECTED;
3606b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device)) {
3607d0180171SAndreas Gruenbacher drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3608b30ab791SAndreas Gruenbacher drbd_bm_total_weight(device));
3609b411b363SPhilipp Reisner }
3610b411b363SPhilipp Reisner }
3611b411b363SPhilipp Reisner
3612b411b363SPhilipp Reisner return rv;
3613b411b363SPhilipp Reisner }
3614b411b363SPhilipp Reisner
convert_after_sb(enum drbd_after_sb_p peer)3615f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3616b411b363SPhilipp Reisner {
3617b411b363SPhilipp Reisner /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
3618f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_REMOTE)
3619f179d76dSPhilipp Reisner return ASB_DISCARD_LOCAL;
3620b411b363SPhilipp Reisner
3621b411b363SPhilipp Reisner /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
3622f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_LOCAL)
3623f179d76dSPhilipp Reisner return ASB_DISCARD_REMOTE;
3624b411b363SPhilipp Reisner
3625b411b363SPhilipp Reisner /* everything else is valid if they are equal on both sides. */
3626f179d76dSPhilipp Reisner return peer;
3627b411b363SPhilipp Reisner }
3628b411b363SPhilipp Reisner
receive_protocol(struct drbd_connection * connection,struct packet_info * pi)3629bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3630b411b363SPhilipp Reisner {
3631e658983aSAndreas Gruenbacher struct p_protocol *p = pi->data;
3632036b17eaSPhilipp Reisner enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3633036b17eaSPhilipp Reisner int p_proto, p_discard_my_data, p_two_primaries, cf;
3634036b17eaSPhilipp Reisner struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3635036b17eaSPhilipp Reisner char integrity_alg[SHARED_SECRET_MAX] = "";
36363d0e6375SKees Cook struct crypto_shash *peer_integrity_tfm = NULL;
36377aca6c75SPhilipp Reisner void *int_dig_in = NULL, *int_dig_vv = NULL;
3638b411b363SPhilipp Reisner
3639b411b363SPhilipp Reisner p_proto = be32_to_cpu(p->protocol);
3640b411b363SPhilipp Reisner p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3641b411b363SPhilipp Reisner p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3642b411b363SPhilipp Reisner p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
3643b411b363SPhilipp Reisner p_two_primaries = be32_to_cpu(p->two_primaries);
3644cf14c2e9SPhilipp Reisner cf = be32_to_cpu(p->conn_flags);
36456139f60dSAndreas Gruenbacher p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3646cf14c2e9SPhilipp Reisner
3647bde89a9eSAndreas Gruenbacher if (connection->agreed_pro_version >= 87) {
364886db0618SAndreas Gruenbacher int err;
364986db0618SAndreas Gruenbacher
365088104ca4SAndreas Gruenbacher if (pi->size > sizeof(integrity_alg))
365186db0618SAndreas Gruenbacher return -EIO;
3652bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, integrity_alg, pi->size);
365386db0618SAndreas Gruenbacher if (err)
365486db0618SAndreas Gruenbacher return err;
365588104ca4SAndreas Gruenbacher integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3656036b17eaSPhilipp Reisner }
365786db0618SAndreas Gruenbacher
36587d4c782cSAndreas Gruenbacher if (pi->cmd != P_PROTOCOL_UPDATE) {
3659bde89a9eSAndreas Gruenbacher clear_bit(CONN_DRY_RUN, &connection->flags);
3660cf14c2e9SPhilipp Reisner
3661cf14c2e9SPhilipp Reisner if (cf & CF_DRY_RUN)
3662bde89a9eSAndreas Gruenbacher set_bit(CONN_DRY_RUN, &connection->flags);
3663b411b363SPhilipp Reisner
366444ed167dSPhilipp Reisner rcu_read_lock();
3665bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
366644ed167dSPhilipp Reisner
3667036b17eaSPhilipp Reisner if (p_proto != nc->wire_protocol) {
36681ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "protocol");
366944ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3670b411b363SPhilipp Reisner }
3671b411b363SPhilipp Reisner
3672f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
36731ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
367444ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3675b411b363SPhilipp Reisner }
3676b411b363SPhilipp Reisner
3677f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
36781ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
367944ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3680b411b363SPhilipp Reisner }
3681b411b363SPhilipp Reisner
3682f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
36831ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
368444ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3685b411b363SPhilipp Reisner }
3686b411b363SPhilipp Reisner
36876139f60dSAndreas Gruenbacher if (p_discard_my_data && nc->discard_my_data) {
36881ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
368944ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3690b411b363SPhilipp Reisner }
3691b411b363SPhilipp Reisner
369244ed167dSPhilipp Reisner if (p_two_primaries != nc->two_primaries) {
36931ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
369444ed167dSPhilipp Reisner goto disconnect_rcu_unlock;
3695b411b363SPhilipp Reisner }
3696b411b363SPhilipp Reisner
3697036b17eaSPhilipp Reisner if (strcmp(integrity_alg, nc->integrity_alg)) {
36981ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3699036b17eaSPhilipp Reisner goto disconnect_rcu_unlock;
3700036b17eaSPhilipp Reisner }
3701036b17eaSPhilipp Reisner
370286db0618SAndreas Gruenbacher rcu_read_unlock();
3703fbc12f45SAndreas Gruenbacher }
37047d4c782cSAndreas Gruenbacher
37057d4c782cSAndreas Gruenbacher if (integrity_alg[0]) {
37067d4c782cSAndreas Gruenbacher int hash_size;
37077d4c782cSAndreas Gruenbacher
37087d4c782cSAndreas Gruenbacher /*
37097d4c782cSAndreas Gruenbacher * We can only change the peer data integrity algorithm
37107d4c782cSAndreas Gruenbacher * here. Changing our own data integrity algorithm
37117d4c782cSAndreas Gruenbacher * requires that we send a P_PROTOCOL_UPDATE packet at
37127d4c782cSAndreas Gruenbacher * the same time; otherwise, the peer has no way to
37137d4c782cSAndreas Gruenbacher * tell between which packets the algorithm should
37147d4c782cSAndreas Gruenbacher * change.
37157d4c782cSAndreas Gruenbacher */
37167d4c782cSAndreas Gruenbacher
37173d234b33SEric Biggers peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
37181b57e663SLars Ellenberg if (IS_ERR(peer_integrity_tfm)) {
37191b57e663SLars Ellenberg peer_integrity_tfm = NULL;
37201ec861ebSAndreas Gruenbacher drbd_err(connection, "peer data-integrity-alg %s not supported\n",
37217d4c782cSAndreas Gruenbacher integrity_alg);
3722b411b363SPhilipp Reisner goto disconnect;
3723b411b363SPhilipp Reisner }
3724b411b363SPhilipp Reisner
37253d0e6375SKees Cook hash_size = crypto_shash_digestsize(peer_integrity_tfm);
37267d4c782cSAndreas Gruenbacher int_dig_in = kmalloc(hash_size, GFP_KERNEL);
37277d4c782cSAndreas Gruenbacher int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
37287d4c782cSAndreas Gruenbacher if (!(int_dig_in && int_dig_vv)) {
37291ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
37307d4c782cSAndreas Gruenbacher goto disconnect;
37317d4c782cSAndreas Gruenbacher }
37327d4c782cSAndreas Gruenbacher }
37337d4c782cSAndreas Gruenbacher
37347d4c782cSAndreas Gruenbacher new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
37358404e191SZhen Lei if (!new_net_conf)
3736b411b363SPhilipp Reisner goto disconnect;
3737b411b363SPhilipp Reisner
3738bde89a9eSAndreas Gruenbacher mutex_lock(&connection->data.mutex);
37390500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update);
3740bde89a9eSAndreas Gruenbacher old_net_conf = connection->net_conf;
37417d4c782cSAndreas Gruenbacher *new_net_conf = *old_net_conf;
3742b411b363SPhilipp Reisner
37437d4c782cSAndreas Gruenbacher new_net_conf->wire_protocol = p_proto;
37447d4c782cSAndreas Gruenbacher new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
37457d4c782cSAndreas Gruenbacher new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
37467d4c782cSAndreas Gruenbacher new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
37477d4c782cSAndreas Gruenbacher new_net_conf->two_primaries = p_two_primaries;
3748b411b363SPhilipp Reisner
3749bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf);
37500500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
3751bde89a9eSAndreas Gruenbacher mutex_unlock(&connection->data.mutex);
3752b411b363SPhilipp Reisner
37533d0e6375SKees Cook crypto_free_shash(connection->peer_integrity_tfm);
3754bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_in);
3755bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_vv);
3756bde89a9eSAndreas Gruenbacher connection->peer_integrity_tfm = peer_integrity_tfm;
3757bde89a9eSAndreas Gruenbacher connection->int_dig_in = int_dig_in;
3758bde89a9eSAndreas Gruenbacher connection->int_dig_vv = int_dig_vv;
3759b411b363SPhilipp Reisner
37607d4c782cSAndreas Gruenbacher if (strcmp(old_net_conf->integrity_alg, integrity_alg))
37611ec861ebSAndreas Gruenbacher drbd_info(connection, "peer data-integrity-alg: %s\n",
37627d4c782cSAndreas Gruenbacher integrity_alg[0] ? integrity_alg : "(none)");
3763b411b363SPhilipp Reisner
3764a77b2109SUladzislau Rezki (Sony) kvfree_rcu_mightsleep(old_net_conf);
376582bc0194SAndreas Gruenbacher return 0;
3766b411b363SPhilipp Reisner
376744ed167dSPhilipp Reisner disconnect_rcu_unlock:
376844ed167dSPhilipp Reisner rcu_read_unlock();
3769b411b363SPhilipp Reisner disconnect:
37703d0e6375SKees Cook crypto_free_shash(peer_integrity_tfm);
3771036b17eaSPhilipp Reisner kfree(int_dig_in);
3772036b17eaSPhilipp Reisner kfree(int_dig_vv);
3773bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
377482bc0194SAndreas Gruenbacher return -EIO;
3775b411b363SPhilipp Reisner }
3776b411b363SPhilipp Reisner
3777b411b363SPhilipp Reisner /* helper function
3778b411b363SPhilipp Reisner * input: alg name, feature name
3779b411b363SPhilipp Reisner * return: NULL (alg name was "")
3780b411b363SPhilipp Reisner * ERR_PTR(error) if something goes wrong
3781b411b363SPhilipp Reisner * or the crypto hash ptr, if it worked out ok. */
drbd_crypto_alloc_digest_safe(const struct drbd_device * device,const char * alg,const char * name)37823d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe(
37833d0e6375SKees Cook const struct drbd_device *device,
3784b411b363SPhilipp Reisner const char *alg, const char *name)
3785b411b363SPhilipp Reisner {
37863d0e6375SKees Cook struct crypto_shash *tfm;
3787b411b363SPhilipp Reisner
3788b411b363SPhilipp Reisner if (!alg[0])
3789b411b363SPhilipp Reisner return NULL;
3790b411b363SPhilipp Reisner
37913d0e6375SKees Cook tfm = crypto_alloc_shash(alg, 0, 0);
3792b411b363SPhilipp Reisner if (IS_ERR(tfm)) {
3793d0180171SAndreas Gruenbacher drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3794b411b363SPhilipp Reisner alg, name, PTR_ERR(tfm));
3795b411b363SPhilipp Reisner return tfm;
3796b411b363SPhilipp Reisner }
3797b411b363SPhilipp Reisner return tfm;
3798b411b363SPhilipp Reisner }
3799b411b363SPhilipp Reisner
ignore_remaining_packet(struct drbd_connection * connection,struct packet_info * pi)3800bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3801b411b363SPhilipp Reisner {
3802bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf;
38034a76b161SAndreas Gruenbacher int size = pi->size;
38044a76b161SAndreas Gruenbacher
38054a76b161SAndreas Gruenbacher while (size) {
38064a76b161SAndreas Gruenbacher int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3807bde89a9eSAndreas Gruenbacher s = drbd_recv(connection, buffer, s);
38084a76b161SAndreas Gruenbacher if (s <= 0) {
38094a76b161SAndreas Gruenbacher if (s < 0)
38104a76b161SAndreas Gruenbacher return s;
38114a76b161SAndreas Gruenbacher break;
38124a76b161SAndreas Gruenbacher }
38134a76b161SAndreas Gruenbacher size -= s;
38144a76b161SAndreas Gruenbacher }
38154a76b161SAndreas Gruenbacher if (size)
38164a76b161SAndreas Gruenbacher return -EIO;
38174a76b161SAndreas Gruenbacher return 0;
38184a76b161SAndreas Gruenbacher }
38194a76b161SAndreas Gruenbacher
38204a76b161SAndreas Gruenbacher /*
38214a76b161SAndreas Gruenbacher * config_unknown_volume - device configuration command for unknown volume
38224a76b161SAndreas Gruenbacher *
38234a76b161SAndreas Gruenbacher * When a device is added to an existing connection, the node on which the
38244a76b161SAndreas Gruenbacher * device is added first will send configuration commands to its peer but the
38254a76b161SAndreas Gruenbacher * peer will not know about the device yet. It will warn and ignore these
38264a76b161SAndreas Gruenbacher * commands. Once the device is added on the second node, the second node will
38274a76b161SAndreas Gruenbacher * send the same device configuration commands, but in the other direction.
38284a76b161SAndreas Gruenbacher *
38294a76b161SAndreas Gruenbacher * (We can also end up here if drbd is misconfigured.)
38304a76b161SAndreas Gruenbacher */
config_unknown_volume(struct drbd_connection * connection,struct packet_info * pi)3831bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
38324a76b161SAndreas Gruenbacher {
38331ec861ebSAndreas Gruenbacher drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
38342fcb8f30SAndreas Gruenbacher cmdname(pi->cmd), pi->vnr);
3835bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi);
38364a76b161SAndreas Gruenbacher }
38374a76b161SAndreas Gruenbacher
receive_SyncParam(struct drbd_connection * connection,struct packet_info * pi)3838bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
38394a76b161SAndreas Gruenbacher {
38409f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
3841b30ab791SAndreas Gruenbacher struct drbd_device *device;
3842e658983aSAndreas Gruenbacher struct p_rs_param_95 *p;
3843b411b363SPhilipp Reisner unsigned int header_size, data_size, exp_max_sz;
38443d0e6375SKees Cook struct crypto_shash *verify_tfm = NULL;
38453d0e6375SKees Cook struct crypto_shash *csums_tfm = NULL;
38462ec91e0eSPhilipp Reisner struct net_conf *old_net_conf, *new_net_conf = NULL;
3847813472ceSPhilipp Reisner struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3848bde89a9eSAndreas Gruenbacher const int apv = connection->agreed_pro_version;
3849813472ceSPhilipp Reisner struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
38506a365874SStephen Kitt unsigned int fifo_size = 0;
385182bc0194SAndreas Gruenbacher int err;
3852b411b363SPhilipp Reisner
38539f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
38549f4fe9adSAndreas Gruenbacher if (!peer_device)
3855bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi);
38569f4fe9adSAndreas Gruenbacher device = peer_device->device;
3857b411b363SPhilipp Reisner
3858b411b363SPhilipp Reisner exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3859b411b363SPhilipp Reisner : apv == 88 ? sizeof(struct p_rs_param)
3860b411b363SPhilipp Reisner + SHARED_SECRET_MAX
38618e26f9ccSPhilipp Reisner : apv <= 94 ? sizeof(struct p_rs_param_89)
38628e26f9ccSPhilipp Reisner : /* apv >= 95 */ sizeof(struct p_rs_param_95);
3863b411b363SPhilipp Reisner
3864e2857216SAndreas Gruenbacher if (pi->size > exp_max_sz) {
3865d0180171SAndreas Gruenbacher drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3866e2857216SAndreas Gruenbacher pi->size, exp_max_sz);
386782bc0194SAndreas Gruenbacher return -EIO;
3868b411b363SPhilipp Reisner }
3869b411b363SPhilipp Reisner
3870b411b363SPhilipp Reisner if (apv <= 88) {
3871e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param);
3872e2857216SAndreas Gruenbacher data_size = pi->size - header_size;
38738e26f9ccSPhilipp Reisner } else if (apv <= 94) {
3874e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_89);
3875e2857216SAndreas Gruenbacher data_size = pi->size - header_size;
38760b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0);
38778e26f9ccSPhilipp Reisner } else {
3878e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_95);
3879e2857216SAndreas Gruenbacher data_size = pi->size - header_size;
38800b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0);
3881b411b363SPhilipp Reisner }
3882b411b363SPhilipp Reisner
3883b411b363SPhilipp Reisner /* initialize verify_alg and csums_alg */
3884e658983aSAndreas Gruenbacher p = pi->data;
388552a0cab3SKees Cook BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
388652a0cab3SKees Cook memset(&p->algs, 0, sizeof(p->algs));
3887b411b363SPhilipp Reisner
38889f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, header_size);
388982bc0194SAndreas Gruenbacher if (err)
389082bc0194SAndreas Gruenbacher return err;
3891b411b363SPhilipp Reisner
38920500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update);
38939f4fe9adSAndreas Gruenbacher old_net_conf = peer_device->connection->net_conf;
3894b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
3895daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3896daeda1ccSPhilipp Reisner if (!new_disk_conf) {
3897b30ab791SAndreas Gruenbacher put_ldev(device);
38980500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
3899d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n");
3900daeda1ccSPhilipp Reisner return -ENOMEM;
3901f399002eSLars Ellenberg }
3902b411b363SPhilipp Reisner
3903b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf;
3904daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf;
3905daeda1ccSPhilipp Reisner
39066394b935SAndreas Gruenbacher new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3907813472ceSPhilipp Reisner }
3908b411b363SPhilipp Reisner
3909b411b363SPhilipp Reisner if (apv >= 88) {
3910b411b363SPhilipp Reisner if (apv == 88) {
39115de73827SPhilipp Reisner if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3912d0180171SAndreas Gruenbacher drbd_err(device, "verify-alg of wrong size, "
39135de73827SPhilipp Reisner "peer wants %u, accepting only up to %u byte\n",
3914b411b363SPhilipp Reisner data_size, SHARED_SECRET_MAX);
3915813472ceSPhilipp Reisner goto reconnect;
3916b411b363SPhilipp Reisner }
3917b411b363SPhilipp Reisner
39189f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3919813472ceSPhilipp Reisner if (err)
3920813472ceSPhilipp Reisner goto reconnect;
3921b411b363SPhilipp Reisner /* we expect NUL terminated string */
3922b411b363SPhilipp Reisner /* but just in case someone tries to be evil */
39230b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3924b411b363SPhilipp Reisner p->verify_alg[data_size-1] = 0;
3925b411b363SPhilipp Reisner
3926b411b363SPhilipp Reisner } else /* apv >= 89 */ {
3927b411b363SPhilipp Reisner /* we still expect NUL terminated strings */
3928b411b363SPhilipp Reisner /* but just in case someone tries to be evil */
39290b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
39300b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3931b411b363SPhilipp Reisner p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3932b411b363SPhilipp Reisner p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3933b411b363SPhilipp Reisner }
3934b411b363SPhilipp Reisner
39352ec91e0eSPhilipp Reisner if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3936b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) {
3937d0180171SAndreas Gruenbacher drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
39382ec91e0eSPhilipp Reisner old_net_conf->verify_alg, p->verify_alg);
3939b411b363SPhilipp Reisner goto disconnect;
3940b411b363SPhilipp Reisner }
3941b30ab791SAndreas Gruenbacher verify_tfm = drbd_crypto_alloc_digest_safe(device,
3942b411b363SPhilipp Reisner p->verify_alg, "verify-alg");
3943b411b363SPhilipp Reisner if (IS_ERR(verify_tfm)) {
3944b411b363SPhilipp Reisner verify_tfm = NULL;
3945b411b363SPhilipp Reisner goto disconnect;
3946b411b363SPhilipp Reisner }
3947b411b363SPhilipp Reisner }
3948b411b363SPhilipp Reisner
39492ec91e0eSPhilipp Reisner if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3950b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) {
3951d0180171SAndreas Gruenbacher drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
39522ec91e0eSPhilipp Reisner old_net_conf->csums_alg, p->csums_alg);
3953b411b363SPhilipp Reisner goto disconnect;
3954b411b363SPhilipp Reisner }
3955b30ab791SAndreas Gruenbacher csums_tfm = drbd_crypto_alloc_digest_safe(device,
3956b411b363SPhilipp Reisner p->csums_alg, "csums-alg");
3957b411b363SPhilipp Reisner if (IS_ERR(csums_tfm)) {
3958b411b363SPhilipp Reisner csums_tfm = NULL;
3959b411b363SPhilipp Reisner goto disconnect;
3960b411b363SPhilipp Reisner }
3961b411b363SPhilipp Reisner }
3962b411b363SPhilipp Reisner
3963813472ceSPhilipp Reisner if (apv > 94 && new_disk_conf) {
3964daeda1ccSPhilipp Reisner new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3965daeda1ccSPhilipp Reisner new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3966daeda1ccSPhilipp Reisner new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3967daeda1ccSPhilipp Reisner new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3968778f271dSPhilipp Reisner
3969daeda1ccSPhilipp Reisner fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3970b30ab791SAndreas Gruenbacher if (fifo_size != device->rs_plan_s->size) {
3971813472ceSPhilipp Reisner new_plan = fifo_alloc(fifo_size);
3972813472ceSPhilipp Reisner if (!new_plan) {
3973d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of fifo_buffer failed");
3974b30ab791SAndreas Gruenbacher put_ldev(device);
3975778f271dSPhilipp Reisner goto disconnect;
3976778f271dSPhilipp Reisner }
3977778f271dSPhilipp Reisner }
39788e26f9ccSPhilipp Reisner }
3979b411b363SPhilipp Reisner
398091fd4dadSPhilipp Reisner if (verify_tfm || csums_tfm) {
39812ec91e0eSPhilipp Reisner new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
39828404e191SZhen Lei if (!new_net_conf)
398391fd4dadSPhilipp Reisner goto disconnect;
398491fd4dadSPhilipp Reisner
39852ec91e0eSPhilipp Reisner *new_net_conf = *old_net_conf;
398691fd4dadSPhilipp Reisner
3987b411b363SPhilipp Reisner if (verify_tfm) {
39882ec91e0eSPhilipp Reisner strcpy(new_net_conf->verify_alg, p->verify_alg);
39892ec91e0eSPhilipp Reisner new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
39903d0e6375SKees Cook crypto_free_shash(peer_device->connection->verify_tfm);
39919f4fe9adSAndreas Gruenbacher peer_device->connection->verify_tfm = verify_tfm;
3992d0180171SAndreas Gruenbacher drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3993b411b363SPhilipp Reisner }
3994b411b363SPhilipp Reisner if (csums_tfm) {
39952ec91e0eSPhilipp Reisner strcpy(new_net_conf->csums_alg, p->csums_alg);
39962ec91e0eSPhilipp Reisner new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
39973d0e6375SKees Cook crypto_free_shash(peer_device->connection->csums_tfm);
39989f4fe9adSAndreas Gruenbacher peer_device->connection->csums_tfm = csums_tfm;
3999d0180171SAndreas Gruenbacher drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
4000b411b363SPhilipp Reisner }
4001bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf);
4002778f271dSPhilipp Reisner }
4003b411b363SPhilipp Reisner }
4004b411b363SPhilipp Reisner
4005813472ceSPhilipp Reisner if (new_disk_conf) {
4006b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4007b30ab791SAndreas Gruenbacher put_ldev(device);
4008b411b363SPhilipp Reisner }
4009813472ceSPhilipp Reisner
4010813472ceSPhilipp Reisner if (new_plan) {
4011b30ab791SAndreas Gruenbacher old_plan = device->rs_plan_s;
4012b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->rs_plan_s, new_plan);
4013813472ceSPhilipp Reisner }
4014daeda1ccSPhilipp Reisner
40150500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
4016daeda1ccSPhilipp Reisner synchronize_rcu();
4017daeda1ccSPhilipp Reisner if (new_net_conf)
4018daeda1ccSPhilipp Reisner kfree(old_net_conf);
4019daeda1ccSPhilipp Reisner kfree(old_disk_conf);
4020813472ceSPhilipp Reisner kfree(old_plan);
4021daeda1ccSPhilipp Reisner
402282bc0194SAndreas Gruenbacher return 0;
4023b411b363SPhilipp Reisner
4024813472ceSPhilipp Reisner reconnect:
4025813472ceSPhilipp Reisner if (new_disk_conf) {
4026b30ab791SAndreas Gruenbacher put_ldev(device);
4027813472ceSPhilipp Reisner kfree(new_disk_conf);
4028813472ceSPhilipp Reisner }
40290500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
4030813472ceSPhilipp Reisner return -EIO;
4031813472ceSPhilipp Reisner
4032b411b363SPhilipp Reisner disconnect:
4033813472ceSPhilipp Reisner kfree(new_plan);
4034813472ceSPhilipp Reisner if (new_disk_conf) {
4035b30ab791SAndreas Gruenbacher put_ldev(device);
4036813472ceSPhilipp Reisner kfree(new_disk_conf);
4037813472ceSPhilipp Reisner }
40380500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
4039b411b363SPhilipp Reisner /* just for completeness: actually not needed,
4040b411b363SPhilipp Reisner * as this is not reached if csums_tfm was ok. */
40413d0e6375SKees Cook crypto_free_shash(csums_tfm);
4042b411b363SPhilipp Reisner /* but free the verify_tfm again, if csums_tfm did not work out */
40433d0e6375SKees Cook crypto_free_shash(verify_tfm);
40449f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
404582bc0194SAndreas Gruenbacher return -EIO;
4046b411b363SPhilipp Reisner }
4047b411b363SPhilipp Reisner
4048b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */
warn_if_differ_considerably(struct drbd_device * device,const char * s,sector_t a,sector_t b)4049b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device,
4050b411b363SPhilipp Reisner const char *s, sector_t a, sector_t b)
4051b411b363SPhilipp Reisner {
4052b411b363SPhilipp Reisner sector_t d;
4053b411b363SPhilipp Reisner if (a == 0 || b == 0)
4054b411b363SPhilipp Reisner return;
4055b411b363SPhilipp Reisner d = (a > b) ? (a - b) : (b - a);
4056b411b363SPhilipp Reisner if (d > (a>>3) || d > (b>>3))
4057d0180171SAndreas Gruenbacher drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4058b411b363SPhilipp Reisner (unsigned long long)a, (unsigned long long)b);
4059b411b363SPhilipp Reisner }
4060b411b363SPhilipp Reisner
receive_sizes(struct drbd_connection * connection,struct packet_info * pi)4061bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4062b411b363SPhilipp Reisner {
40639f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4064b30ab791SAndreas Gruenbacher struct drbd_device *device;
4065e658983aSAndreas Gruenbacher struct p_sizes *p = pi->data;
40669104d31aSLars Ellenberg struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4067e96c9633SPhilipp Reisner enum determine_dev_size dd = DS_UNCHANGED;
40686a8d68b1SLars Ellenberg sector_t p_size, p_usize, p_csize, my_usize;
406994c43a13SLars Ellenberg sector_t new_size, cur_size;
4070b411b363SPhilipp Reisner int ldsc = 0; /* local disk size changed */
4071e89b591cSPhilipp Reisner enum dds_flags ddsf;
4072b411b363SPhilipp Reisner
40739f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
40749f4fe9adSAndreas Gruenbacher if (!peer_device)
4075bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi);
40769f4fe9adSAndreas Gruenbacher device = peer_device->device;
4077155bd9d1SChristoph Hellwig cur_size = get_capacity(device->vdisk);
40784a76b161SAndreas Gruenbacher
4079b411b363SPhilipp Reisner p_size = be64_to_cpu(p->d_size);
4080b411b363SPhilipp Reisner p_usize = be64_to_cpu(p->u_size);
40816a8d68b1SLars Ellenberg p_csize = be64_to_cpu(p->c_size);
4082b411b363SPhilipp Reisner
4083b411b363SPhilipp Reisner /* just store the peer's disk size for now.
4084b411b363SPhilipp Reisner * we still need to figure out whether we accept that. */
4085b30ab791SAndreas Gruenbacher device->p_size = p_size;
4086b411b363SPhilipp Reisner
4087b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
4088daeda1ccSPhilipp Reisner rcu_read_lock();
4089b30ab791SAndreas Gruenbacher my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4090daeda1ccSPhilipp Reisner rcu_read_unlock();
4091daeda1ccSPhilipp Reisner
4092b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "lower level device sizes",
4093b30ab791SAndreas Gruenbacher p_size, drbd_get_max_capacity(device->ldev));
4094b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "user requested size",
4095daeda1ccSPhilipp Reisner p_usize, my_usize);
4096b411b363SPhilipp Reisner
4097b411b363SPhilipp Reisner /* if this is the first connect, or an otherwise expected
4098b411b363SPhilipp Reisner * param exchange, choose the minimum */
4099b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS)
4100daeda1ccSPhilipp Reisner p_usize = min_not_zero(my_usize, p_usize);
4101b411b363SPhilipp Reisner
4102ad6e8979SLars Ellenberg /* Never shrink a device with usable data during connect,
4103ad6e8979SLars Ellenberg * or "attach" on the peer.
4104ad6e8979SLars Ellenberg * But allow online shrinking if we are connected. */
410560bac040SLars Ellenberg new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
410660bac040SLars Ellenberg if (new_size < cur_size &&
4107b30ab791SAndreas Gruenbacher device->state.disk >= D_OUTDATED &&
4108ad6e8979SLars Ellenberg (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
410960bac040SLars Ellenberg drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
411060bac040SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size);
41119f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4112b30ab791SAndreas Gruenbacher put_ldev(device);
411382bc0194SAndreas Gruenbacher return -EIO;
4114b411b363SPhilipp Reisner }
4115daeda1ccSPhilipp Reisner
4116daeda1ccSPhilipp Reisner if (my_usize != p_usize) {
4117daeda1ccSPhilipp Reisner struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4118daeda1ccSPhilipp Reisner
4119daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4120daeda1ccSPhilipp Reisner if (!new_disk_conf) {
4121b30ab791SAndreas Gruenbacher put_ldev(device);
4122daeda1ccSPhilipp Reisner return -ENOMEM;
4123daeda1ccSPhilipp Reisner }
4124daeda1ccSPhilipp Reisner
41250500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update);
4126b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf;
4127daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf;
4128daeda1ccSPhilipp Reisner new_disk_conf->disk_size = p_usize;
4129daeda1ccSPhilipp Reisner
4130b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
41310500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update);
4132a77b2109SUladzislau Rezki (Sony) kvfree_rcu_mightsleep(old_disk_conf);
4133daeda1ccSPhilipp Reisner
4134ad6e8979SLars Ellenberg drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
4135ad6e8979SLars Ellenberg (unsigned long)p_usize, (unsigned long)my_usize);
4136daeda1ccSPhilipp Reisner }
4137daeda1ccSPhilipp Reisner
4138b30ab791SAndreas Gruenbacher put_ldev(device);
4139b411b363SPhilipp Reisner }
4140b411b363SPhilipp Reisner
414120c68fdeSLars Ellenberg device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4142dd4f699dSLars Ellenberg /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
414320c68fdeSLars Ellenberg In case we cleared the QUEUE_FLAG_DISCARD from our queue in
4144dd4f699dSLars Ellenberg drbd_reconsider_queue_parameters(), we can be sure that after
414520c68fdeSLars Ellenberg drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
414620c68fdeSLars Ellenberg
4147e89b591cSPhilipp Reisner ddsf = be16_to_cpu(p->dds_flags);
4148b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
41499104d31aSLars Ellenberg drbd_reconsider_queue_parameters(device, device->ldev, o);
4150b30ab791SAndreas Gruenbacher dd = drbd_determine_dev_size(device, ddsf, NULL);
4151b30ab791SAndreas Gruenbacher put_ldev(device);
4152e96c9633SPhilipp Reisner if (dd == DS_ERROR)
415382bc0194SAndreas Gruenbacher return -EIO;
4154b30ab791SAndreas Gruenbacher drbd_md_sync(device);
4155b411b363SPhilipp Reisner } else {
41566a8d68b1SLars Ellenberg /*
41576a8d68b1SLars Ellenberg * I am diskless, need to accept the peer's *current* size.
41586a8d68b1SLars Ellenberg * I must NOT accept the peers backing disk size,
41596a8d68b1SLars Ellenberg * it may have been larger than mine all along...
41606a8d68b1SLars Ellenberg *
41616a8d68b1SLars Ellenberg * At this point, the peer knows more about my disk, or at
41626a8d68b1SLars Ellenberg * least about what we last agreed upon, than myself.
41636a8d68b1SLars Ellenberg * So if his c_size is less than his d_size, the most likely
41646a8d68b1SLars Ellenberg * reason is that *my* d_size was smaller last time we checked.
41656a8d68b1SLars Ellenberg *
41666a8d68b1SLars Ellenberg * However, if he sends a zero current size,
41676a8d68b1SLars Ellenberg * take his (user-capped or) backing disk size anyways.
416894c43a13SLars Ellenberg *
416994c43a13SLars Ellenberg * Unless of course he does not have a disk himself.
417094c43a13SLars Ellenberg * In which case we ignore this completely.
41716a8d68b1SLars Ellenberg */
417294c43a13SLars Ellenberg sector_t new_size = p_csize ?: p_usize ?: p_size;
41739104d31aSLars Ellenberg drbd_reconsider_queue_parameters(device, NULL, o);
417494c43a13SLars Ellenberg if (new_size == 0) {
417594c43a13SLars Ellenberg /* Ignore, peer does not know nothing. */
417694c43a13SLars Ellenberg } else if (new_size == cur_size) {
417794c43a13SLars Ellenberg /* nothing to do */
417894c43a13SLars Ellenberg } else if (cur_size != 0 && p_size == 0) {
417994c43a13SLars Ellenberg drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
418094c43a13SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size);
418194c43a13SLars Ellenberg } else if (new_size < cur_size && device->state.role == R_PRIMARY) {
418294c43a13SLars Ellenberg drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
418394c43a13SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size);
418494c43a13SLars Ellenberg conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
418594c43a13SLars Ellenberg return -EIO;
418694c43a13SLars Ellenberg } else {
418794c43a13SLars Ellenberg /* I believe the peer, if
418894c43a13SLars Ellenberg * - I don't have a current size myself
418994c43a13SLars Ellenberg * - we agree on the size anyways
419094c43a13SLars Ellenberg * - I do have a current size, am Secondary,
419194c43a13SLars Ellenberg * and he has the only disk
419294c43a13SLars Ellenberg * - I do have a current size, am Primary,
419394c43a13SLars Ellenberg * and he has the only disk,
419494c43a13SLars Ellenberg * which is larger than my current size
419594c43a13SLars Ellenberg */
419694c43a13SLars Ellenberg drbd_set_my_capacity(device, new_size);
419794c43a13SLars Ellenberg }
4198b411b363SPhilipp Reisner }
4199b411b363SPhilipp Reisner
4200b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
4201b30ab791SAndreas Gruenbacher if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4202b30ab791SAndreas Gruenbacher device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4203b411b363SPhilipp Reisner ldsc = 1;
4204b411b363SPhilipp Reisner }
4205b411b363SPhilipp Reisner
4206b30ab791SAndreas Gruenbacher put_ldev(device);
4207b411b363SPhilipp Reisner }
4208b411b363SPhilipp Reisner
4209b30ab791SAndreas Gruenbacher if (device->state.conn > C_WF_REPORT_PARAMS) {
4210155bd9d1SChristoph Hellwig if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
4211155bd9d1SChristoph Hellwig ldsc) {
4212b411b363SPhilipp Reisner /* we have different sizes, probably peer
4213b411b363SPhilipp Reisner * needs to know my new size... */
421469a22773SAndreas Gruenbacher drbd_send_sizes(peer_device, 0, ddsf);
4215b411b363SPhilipp Reisner }
4216b30ab791SAndreas Gruenbacher if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4217b30ab791SAndreas Gruenbacher (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4218b30ab791SAndreas Gruenbacher if (device->state.pdsk >= D_INCONSISTENT &&
4219b30ab791SAndreas Gruenbacher device->state.disk >= D_INCONSISTENT) {
4220e89b591cSPhilipp Reisner if (ddsf & DDSF_NO_RESYNC)
4221d0180171SAndreas Gruenbacher drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4222b411b363SPhilipp Reisner else
4223b30ab791SAndreas Gruenbacher resync_after_online_grow(device);
4224e89b591cSPhilipp Reisner } else
4225b30ab791SAndreas Gruenbacher set_bit(RESYNC_AFTER_NEG, &device->flags);
4226b411b363SPhilipp Reisner }
4227b411b363SPhilipp Reisner }
4228b411b363SPhilipp Reisner
422982bc0194SAndreas Gruenbacher return 0;
4230b411b363SPhilipp Reisner }
4231b411b363SPhilipp Reisner
receive_uuids(struct drbd_connection * connection,struct packet_info * pi)4232bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4233b411b363SPhilipp Reisner {
42349f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4235b30ab791SAndreas Gruenbacher struct drbd_device *device;
4236e658983aSAndreas Gruenbacher struct p_uuids *p = pi->data;
4237b411b363SPhilipp Reisner u64 *p_uuid;
423862b0da3aSLars Ellenberg int i, updated_uuids = 0;
4239b411b363SPhilipp Reisner
42409f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
42419f4fe9adSAndreas Gruenbacher if (!peer_device)
4242bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi);
42439f4fe9adSAndreas Gruenbacher device = peer_device->device;
42444a76b161SAndreas Gruenbacher
4245365cf663SRoland Kammerer p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
42468404e191SZhen Lei if (!p_uuid)
4247063eacf8SJing Wang return false;
4248b411b363SPhilipp Reisner
4249b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4250b411b363SPhilipp Reisner p_uuid[i] = be64_to_cpu(p->uuid[i]);
4251b411b363SPhilipp Reisner
4252b30ab791SAndreas Gruenbacher kfree(device->p_uuid);
4253b30ab791SAndreas Gruenbacher device->p_uuid = p_uuid;
4254b411b363SPhilipp Reisner
4255b17b5960SLars Ellenberg if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
4256b30ab791SAndreas Gruenbacher device->state.disk < D_INCONSISTENT &&
4257b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY &&
4258b30ab791SAndreas Gruenbacher (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4259d0180171SAndreas Gruenbacher drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4260b30ab791SAndreas Gruenbacher (unsigned long long)device->ed_uuid);
42619f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
426282bc0194SAndreas Gruenbacher return -EIO;
4263b411b363SPhilipp Reisner }
4264b411b363SPhilipp Reisner
4265b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
4266b411b363SPhilipp Reisner int skip_initial_sync =
4267b30ab791SAndreas Gruenbacher device->state.conn == C_CONNECTED &&
42689f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90 &&
4269b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4270b411b363SPhilipp Reisner (p_uuid[UI_FLAGS] & 8);
4271b411b363SPhilipp Reisner if (skip_initial_sync) {
4272d0180171SAndreas Gruenbacher drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4273b30ab791SAndreas Gruenbacher drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
427420ceb2b2SLars Ellenberg "clear_n_write from receive_uuids",
42758164dd6cSAndreas Gruenbacher BM_LOCKED_TEST_ALLOWED, NULL);
4276b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4277b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0);
4278b30ab791SAndreas Gruenbacher _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4279b411b363SPhilipp Reisner CS_VERBOSE, NULL);
4280b30ab791SAndreas Gruenbacher drbd_md_sync(device);
428162b0da3aSLars Ellenberg updated_uuids = 1;
4282b411b363SPhilipp Reisner }
4283b30ab791SAndreas Gruenbacher put_ldev(device);
4284b30ab791SAndreas Gruenbacher } else if (device->state.disk < D_INCONSISTENT &&
4285b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY) {
428618a50fa2SPhilipp Reisner /* I am a diskless primary, the peer just created a new current UUID
428718a50fa2SPhilipp Reisner for me. */
4288b30ab791SAndreas Gruenbacher updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4289b411b363SPhilipp Reisner }
4290b411b363SPhilipp Reisner
4291b411b363SPhilipp Reisner /* Before we test for the disk state, we should wait until an eventually
4292b411b363SPhilipp Reisner ongoing cluster wide state change is finished. That is important if
4293b411b363SPhilipp Reisner we are primary and are detaching from our disk. We need to see the
4294b411b363SPhilipp Reisner new disk state... */
4295b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex);
4296b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex);
4297b30ab791SAndreas Gruenbacher if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4298b30ab791SAndreas Gruenbacher updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
429962b0da3aSLars Ellenberg
430062b0da3aSLars Ellenberg if (updated_uuids)
4301b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "receiver updated UUIDs to");
4302b411b363SPhilipp Reisner
430382bc0194SAndreas Gruenbacher return 0;
4304b411b363SPhilipp Reisner }
4305b411b363SPhilipp Reisner
4306b411b363SPhilipp Reisner /**
4307b411b363SPhilipp Reisner * convert_state() - Converts the peer's view of the cluster state to our point of view
4308b411b363SPhilipp Reisner * @ps: The state as seen by the peer.
4309b411b363SPhilipp Reisner */
convert_state(union drbd_state ps)4310b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps)
4311b411b363SPhilipp Reisner {
4312b411b363SPhilipp Reisner union drbd_state ms;
4313b411b363SPhilipp Reisner
4314b411b363SPhilipp Reisner static enum drbd_conns c_tab[] = {
4315369bea63SPhilipp Reisner [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4316b411b363SPhilipp Reisner [C_CONNECTED] = C_CONNECTED,
4317b411b363SPhilipp Reisner
4318b411b363SPhilipp Reisner [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4319b411b363SPhilipp Reisner [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4320b411b363SPhilipp Reisner [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4321b411b363SPhilipp Reisner [C_VERIFY_S] = C_VERIFY_T,
4322b411b363SPhilipp Reisner [C_MASK] = C_MASK,
4323b411b363SPhilipp Reisner };
4324b411b363SPhilipp Reisner
4325b411b363SPhilipp Reisner ms.i = ps.i;
4326b411b363SPhilipp Reisner
4327b411b363SPhilipp Reisner ms.conn = c_tab[ps.conn];
4328b411b363SPhilipp Reisner ms.peer = ps.role;
4329b411b363SPhilipp Reisner ms.role = ps.peer;
4330b411b363SPhilipp Reisner ms.pdsk = ps.disk;
4331b411b363SPhilipp Reisner ms.disk = ps.pdsk;
4332b411b363SPhilipp Reisner ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4333b411b363SPhilipp Reisner
4334b411b363SPhilipp Reisner return ms;
4335b411b363SPhilipp Reisner }
4336b411b363SPhilipp Reisner
receive_req_state(struct drbd_connection * connection,struct packet_info * pi)4337bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4338b411b363SPhilipp Reisner {
43399f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4340b30ab791SAndreas Gruenbacher struct drbd_device *device;
4341e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data;
4342b411b363SPhilipp Reisner union drbd_state mask, val;
4343bf885f8aSAndreas Gruenbacher enum drbd_state_rv rv;
4344b411b363SPhilipp Reisner
43459f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
43469f4fe9adSAndreas Gruenbacher if (!peer_device)
43474a76b161SAndreas Gruenbacher return -EIO;
43489f4fe9adSAndreas Gruenbacher device = peer_device->device;
43494a76b161SAndreas Gruenbacher
4350b411b363SPhilipp Reisner mask.i = be32_to_cpu(p->mask);
4351b411b363SPhilipp Reisner val.i = be32_to_cpu(p->val);
4352b411b363SPhilipp Reisner
43539f4fe9adSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4354b30ab791SAndreas Gruenbacher mutex_is_locked(device->state_mutex)) {
435569a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
435682bc0194SAndreas Gruenbacher return 0;
4357b411b363SPhilipp Reisner }
4358b411b363SPhilipp Reisner
4359b411b363SPhilipp Reisner mask = convert_state(mask);
4360b411b363SPhilipp Reisner val = convert_state(val);
4361b411b363SPhilipp Reisner
4362b30ab791SAndreas Gruenbacher rv = drbd_change_state(device, CS_VERBOSE, mask, val);
436369a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, rv);
4364047cd4a6SPhilipp Reisner
4365b30ab791SAndreas Gruenbacher drbd_md_sync(device);
4366b411b363SPhilipp Reisner
436782bc0194SAndreas Gruenbacher return 0;
4368b411b363SPhilipp Reisner }
4369b411b363SPhilipp Reisner
receive_req_conn_state(struct drbd_connection * connection,struct packet_info * pi)4370bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4371b411b363SPhilipp Reisner {
4372e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data;
4373dfafcc8aSPhilipp Reisner union drbd_state mask, val;
4374dfafcc8aSPhilipp Reisner enum drbd_state_rv rv;
4375dfafcc8aSPhilipp Reisner
4376dfafcc8aSPhilipp Reisner mask.i = be32_to_cpu(p->mask);
4377dfafcc8aSPhilipp Reisner val.i = be32_to_cpu(p->val);
4378dfafcc8aSPhilipp Reisner
4379bde89a9eSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4380bde89a9eSAndreas Gruenbacher mutex_is_locked(&connection->cstate_mutex)) {
4381bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
438282bc0194SAndreas Gruenbacher return 0;
4383dfafcc8aSPhilipp Reisner }
4384dfafcc8aSPhilipp Reisner
4385dfafcc8aSPhilipp Reisner mask = convert_state(mask);
4386dfafcc8aSPhilipp Reisner val = convert_state(val);
4387dfafcc8aSPhilipp Reisner
4388bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4389bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, rv);
4390dfafcc8aSPhilipp Reisner
439182bc0194SAndreas Gruenbacher return 0;
4392dfafcc8aSPhilipp Reisner }
4393dfafcc8aSPhilipp Reisner
receive_state(struct drbd_connection * connection,struct packet_info * pi)4394bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4395b411b363SPhilipp Reisner {
43969f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4397b30ab791SAndreas Gruenbacher struct drbd_device *device;
4398e658983aSAndreas Gruenbacher struct p_state *p = pi->data;
43994ac4aadaSLars Ellenberg union drbd_state os, ns, peer_state;
4400b411b363SPhilipp Reisner enum drbd_disk_state real_peer_disk;
440165d922c3SPhilipp Reisner enum chg_state_flags cs_flags;
4402b411b363SPhilipp Reisner int rv;
4403b411b363SPhilipp Reisner
44049f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
44059f4fe9adSAndreas Gruenbacher if (!peer_device)
4406bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi);
44079f4fe9adSAndreas Gruenbacher device = peer_device->device;
44084a76b161SAndreas Gruenbacher
4409b411b363SPhilipp Reisner peer_state.i = be32_to_cpu(p->state);
4410b411b363SPhilipp Reisner
4411b411b363SPhilipp Reisner real_peer_disk = peer_state.disk;
4412b411b363SPhilipp Reisner if (peer_state.disk == D_NEGOTIATING) {
4413b30ab791SAndreas Gruenbacher real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4414d0180171SAndreas Gruenbacher drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4415b411b363SPhilipp Reisner }
4416b411b363SPhilipp Reisner
44170500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
4418b411b363SPhilipp Reisner retry:
4419b30ab791SAndreas Gruenbacher os = ns = drbd_read_state(device);
44200500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
4421b411b363SPhilipp Reisner
4422668700b4SPhilipp Reisner /* If some other part of the code (ack_receiver thread, timeout)
4423545752d5SLars Ellenberg * already decided to close the connection again,
4424545752d5SLars Ellenberg * we must not "re-establish" it here. */
4425545752d5SLars Ellenberg if (os.conn <= C_TEAR_DOWN)
442658ffa580SLars Ellenberg return -ECONNRESET;
4427545752d5SLars Ellenberg
442840424e4aSLars Ellenberg /* If this is the "end of sync" confirmation, usually the peer disk
442940424e4aSLars Ellenberg * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
443040424e4aSLars Ellenberg * set) resync started in PausedSyncT, or if the timing of pause-/
443140424e4aSLars Ellenberg * unpause-sync events has been "just right", the peer disk may
443240424e4aSLars Ellenberg * transition from D_CONSISTENT to D_UP_TO_DATE as well.
443340424e4aSLars Ellenberg */
443440424e4aSLars Ellenberg if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
443540424e4aSLars Ellenberg real_peer_disk == D_UP_TO_DATE &&
4436e9ef7bb6SLars Ellenberg os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4437e9ef7bb6SLars Ellenberg /* If we are (becoming) SyncSource, but peer is still in sync
4438e9ef7bb6SLars Ellenberg * preparation, ignore its uptodate-ness to avoid flapping, it
4439e9ef7bb6SLars Ellenberg * will change to inconsistent once the peer reaches active
4440e9ef7bb6SLars Ellenberg * syncing states.
4441e9ef7bb6SLars Ellenberg * It may have changed syncer-paused flags, however, so we
4442e9ef7bb6SLars Ellenberg * cannot ignore this completely. */
4443e9ef7bb6SLars Ellenberg if (peer_state.conn > C_CONNECTED &&
4444e9ef7bb6SLars Ellenberg peer_state.conn < C_SYNC_SOURCE)
4445e9ef7bb6SLars Ellenberg real_peer_disk = D_INCONSISTENT;
4446e9ef7bb6SLars Ellenberg
4447e9ef7bb6SLars Ellenberg /* if peer_state changes to connected at the same time,
4448e9ef7bb6SLars Ellenberg * it explicitly notifies us that it finished resync.
4449e9ef7bb6SLars Ellenberg * Maybe we should finish it up, too? */
4450e9ef7bb6SLars Ellenberg else if (os.conn >= C_SYNC_SOURCE &&
4451e9ef7bb6SLars Ellenberg peer_state.conn == C_CONNECTED) {
4452b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device) <= device->rs_failed)
44530d11f3cfSChristoph Böhmwalder drbd_resync_finished(peer_device);
445482bc0194SAndreas Gruenbacher return 0;
4455e9ef7bb6SLars Ellenberg }
4456e9ef7bb6SLars Ellenberg }
4457e9ef7bb6SLars Ellenberg
445802b91b55SLars Ellenberg /* explicit verify finished notification, stop sector reached. */
445902b91b55SLars Ellenberg if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
446002b91b55SLars Ellenberg peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
44610d11f3cfSChristoph Böhmwalder ov_out_of_sync_print(peer_device);
44620d11f3cfSChristoph Böhmwalder drbd_resync_finished(peer_device);
446358ffa580SLars Ellenberg return 0;
446402b91b55SLars Ellenberg }
446502b91b55SLars Ellenberg
4466e9ef7bb6SLars Ellenberg /* peer says his disk is inconsistent, while we think it is uptodate,
4467e9ef7bb6SLars Ellenberg * and this happens while the peer still thinks we have a sync going on,
4468e9ef7bb6SLars Ellenberg * but we think we are already done with the sync.
4469e9ef7bb6SLars Ellenberg * We ignore this to avoid flapping pdsk.
4470e9ef7bb6SLars Ellenberg * This should not happen, if the peer is a recent version of drbd. */
4471e9ef7bb6SLars Ellenberg if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4472e9ef7bb6SLars Ellenberg os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4473e9ef7bb6SLars Ellenberg real_peer_disk = D_UP_TO_DATE;
4474e9ef7bb6SLars Ellenberg
44754ac4aadaSLars Ellenberg if (ns.conn == C_WF_REPORT_PARAMS)
44764ac4aadaSLars Ellenberg ns.conn = C_CONNECTED;
4477b411b363SPhilipp Reisner
447867531718SPhilipp Reisner if (peer_state.conn == C_AHEAD)
447967531718SPhilipp Reisner ns.conn = C_BEHIND;
448067531718SPhilipp Reisner
4481fe43ed97SLars Ellenberg /* TODO:
4482fe43ed97SLars Ellenberg * if (primary and diskless and peer uuid != effective uuid)
4483fe43ed97SLars Ellenberg * abort attach on peer;
4484fe43ed97SLars Ellenberg *
4485fe43ed97SLars Ellenberg * If this node does not have good data, was already connected, but
4486fe43ed97SLars Ellenberg * the peer did a late attach only now, trying to "negotiate" with me,
4487fe43ed97SLars Ellenberg * AND I am currently Primary, possibly frozen, with some specific
4488fe43ed97SLars Ellenberg * "effective" uuid, this should never be reached, really, because
4489fe43ed97SLars Ellenberg * we first send the uuids, then the current state.
4490fe43ed97SLars Ellenberg *
4491fe43ed97SLars Ellenberg * In this scenario, we already dropped the connection hard
4492fe43ed97SLars Ellenberg * when we received the unsuitable uuids (receive_uuids().
4493fe43ed97SLars Ellenberg *
4494fe43ed97SLars Ellenberg * Should we want to change this, that is: not drop the connection in
4495fe43ed97SLars Ellenberg * receive_uuids() already, then we would need to add a branch here
4496fe43ed97SLars Ellenberg * that aborts the attach of "unsuitable uuids" on the peer in case
4497fe43ed97SLars Ellenberg * this node is currently Diskless Primary.
4498fe43ed97SLars Ellenberg */
4499fe43ed97SLars Ellenberg
4500b30ab791SAndreas Gruenbacher if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4501b30ab791SAndreas Gruenbacher get_ldev_if_state(device, D_NEGOTIATING)) {
4502b411b363SPhilipp Reisner int cr; /* consider resync */
4503b411b363SPhilipp Reisner
4504b411b363SPhilipp Reisner /* if we established a new connection */
45054ac4aadaSLars Ellenberg cr = (os.conn < C_CONNECTED);
4506b411b363SPhilipp Reisner /* if we had an established connection
4507b411b363SPhilipp Reisner * and one of the nodes newly attaches a disk */
45084ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED &&
4509b411b363SPhilipp Reisner (peer_state.disk == D_NEGOTIATING ||
45104ac4aadaSLars Ellenberg os.disk == D_NEGOTIATING));
4511b411b363SPhilipp Reisner /* if we have both been inconsistent, and the peer has been
4512a2823ea9SLars Ellenberg * forced to be UpToDate with --force */
4513b30ab791SAndreas Gruenbacher cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4514b411b363SPhilipp Reisner /* if we had been plain connected, and the admin requested to
4515b411b363SPhilipp Reisner * start a sync by "invalidate" or "invalidate-remote" */
45164ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED &&
4517b411b363SPhilipp Reisner (peer_state.conn >= C_STARTING_SYNC_S &&
4518b411b363SPhilipp Reisner peer_state.conn <= C_WF_BITMAP_T));
4519b411b363SPhilipp Reisner
4520b411b363SPhilipp Reisner if (cr)
452169a22773SAndreas Gruenbacher ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4522b411b363SPhilipp Reisner
4523b30ab791SAndreas Gruenbacher put_ldev(device);
45244ac4aadaSLars Ellenberg if (ns.conn == C_MASK) {
45254ac4aadaSLars Ellenberg ns.conn = C_CONNECTED;
4526b30ab791SAndreas Gruenbacher if (device->state.disk == D_NEGOTIATING) {
4527b30ab791SAndreas Gruenbacher drbd_force_state(device, NS(disk, D_FAILED));
4528b411b363SPhilipp Reisner } else if (peer_state.disk == D_NEGOTIATING) {
4529d0180171SAndreas Gruenbacher drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4530b411b363SPhilipp Reisner peer_state.disk = D_DISKLESS;
4531580b9767SLars Ellenberg real_peer_disk = D_DISKLESS;
4532b411b363SPhilipp Reisner } else {
45339f4fe9adSAndreas Gruenbacher if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
453482bc0194SAndreas Gruenbacher return -EIO;
45350b0ba1efSAndreas Gruenbacher D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
45369f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
453782bc0194SAndreas Gruenbacher return -EIO;
4538b411b363SPhilipp Reisner }
4539b411b363SPhilipp Reisner }
4540b411b363SPhilipp Reisner }
4541b411b363SPhilipp Reisner
45420500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
4543b30ab791SAndreas Gruenbacher if (os.i != drbd_read_state(device).i)
4544b411b363SPhilipp Reisner goto retry;
4545b30ab791SAndreas Gruenbacher clear_bit(CONSIDER_RESYNC, &device->flags);
4546b411b363SPhilipp Reisner ns.peer = peer_state.role;
4547b411b363SPhilipp Reisner ns.pdsk = real_peer_disk;
4548b411b363SPhilipp Reisner ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
45494ac4aadaSLars Ellenberg if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4550b30ab791SAndreas Gruenbacher ns.disk = device->new_state_tmp.disk;
45514ac4aadaSLars Ellenberg cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4552b30ab791SAndreas Gruenbacher if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4553b30ab791SAndreas Gruenbacher test_bit(NEW_CUR_UUID, &device->flags)) {
45548554df1cSAndreas Gruenbacher /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
4555481c6f50SPhilipp Reisner for temporal network outages! */
45560500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
4557d0180171SAndreas Gruenbacher drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
45589f4fe9adSAndreas Gruenbacher tl_clear(peer_device->connection);
4559b30ab791SAndreas Gruenbacher drbd_uuid_new_current(device);
4560b30ab791SAndreas Gruenbacher clear_bit(NEW_CUR_UUID, &device->flags);
45619f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
456282bc0194SAndreas Gruenbacher return -EIO;
4563481c6f50SPhilipp Reisner }
4564b30ab791SAndreas Gruenbacher rv = _drbd_set_state(device, ns, cs_flags, NULL);
4565b30ab791SAndreas Gruenbacher ns = drbd_read_state(device);
45660500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
4567b411b363SPhilipp Reisner
4568b411b363SPhilipp Reisner if (rv < SS_SUCCESS) {
45699f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
457082bc0194SAndreas Gruenbacher return -EIO;
4571b411b363SPhilipp Reisner }
4572b411b363SPhilipp Reisner
45734ac4aadaSLars Ellenberg if (os.conn > C_WF_REPORT_PARAMS) {
45744ac4aadaSLars Ellenberg if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4575b411b363SPhilipp Reisner peer_state.disk != D_NEGOTIATING ) {
4576b411b363SPhilipp Reisner /* we want resync, peer has not yet decided to sync... */
4577b411b363SPhilipp Reisner /* Nowadays only used when forcing a node into primary role and
4578b411b363SPhilipp Reisner setting its disk to UpToDate with that */
457969a22773SAndreas Gruenbacher drbd_send_uuids(peer_device);
458069a22773SAndreas Gruenbacher drbd_send_current_state(peer_device);
4581b411b363SPhilipp Reisner }
4582b411b363SPhilipp Reisner }
4583b411b363SPhilipp Reisner
4584b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags);
4585b411b363SPhilipp Reisner
4586b30ab791SAndreas Gruenbacher drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
4587b411b363SPhilipp Reisner
458882bc0194SAndreas Gruenbacher return 0;
4589b411b363SPhilipp Reisner }
4590b411b363SPhilipp Reisner
receive_sync_uuid(struct drbd_connection * connection,struct packet_info * pi)4591bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4592b411b363SPhilipp Reisner {
45939f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4594b30ab791SAndreas Gruenbacher struct drbd_device *device;
4595e658983aSAndreas Gruenbacher struct p_rs_uuid *p = pi->data;
45964a76b161SAndreas Gruenbacher
45979f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
45989f4fe9adSAndreas Gruenbacher if (!peer_device)
45994a76b161SAndreas Gruenbacher return -EIO;
46009f4fe9adSAndreas Gruenbacher device = peer_device->device;
4601b411b363SPhilipp Reisner
4602b30ab791SAndreas Gruenbacher wait_event(device->misc_wait,
4603b30ab791SAndreas Gruenbacher device->state.conn == C_WF_SYNC_UUID ||
4604b30ab791SAndreas Gruenbacher device->state.conn == C_BEHIND ||
4605b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED ||
4606b30ab791SAndreas Gruenbacher device->state.disk < D_NEGOTIATING);
4607b411b363SPhilipp Reisner
46080b0ba1efSAndreas Gruenbacher /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
4609b411b363SPhilipp Reisner
4610b411b363SPhilipp Reisner /* Here the _drbd_uuid_ functions are right, current should
4611b411b363SPhilipp Reisner _not_ be rotated into the history */
4612b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_NEGOTIATING)) {
4613b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4614b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0UL);
4615b411b363SPhilipp Reisner
4616b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated sync uuid");
4617b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_TARGET);
4618b411b363SPhilipp Reisner
4619b30ab791SAndreas Gruenbacher put_ldev(device);
4620b411b363SPhilipp Reisner } else
4621d0180171SAndreas Gruenbacher drbd_err(device, "Ignoring SyncUUID packet!\n");
4622b411b363SPhilipp Reisner
462382bc0194SAndreas Gruenbacher return 0;
4624b411b363SPhilipp Reisner }
4625b411b363SPhilipp Reisner
46269b48ff07SLee Jones /*
46272c46407dSAndreas Gruenbacher * receive_bitmap_plain
46282c46407dSAndreas Gruenbacher *
46292c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error
46302c46407dSAndreas Gruenbacher * code upon failure.
46312c46407dSAndreas Gruenbacher */
46322c46407dSAndreas Gruenbacher static int
receive_bitmap_plain(struct drbd_peer_device * peer_device,unsigned int size,unsigned long * p,struct bm_xfer_ctx * c)463369a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4634e658983aSAndreas Gruenbacher unsigned long *p, struct bm_xfer_ctx *c)
4635b411b363SPhilipp Reisner {
463650d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
463769a22773SAndreas Gruenbacher drbd_header_size(peer_device->connection);
4638e658983aSAndreas Gruenbacher unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
463950d0b1adSAndreas Gruenbacher c->bm_words - c->word_offset);
4640e658983aSAndreas Gruenbacher unsigned int want = num_words * sizeof(*p);
46412c46407dSAndreas Gruenbacher int err;
4642b411b363SPhilipp Reisner
464350d0b1adSAndreas Gruenbacher if (want != size) {
464469a22773SAndreas Gruenbacher drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
46452c46407dSAndreas Gruenbacher return -EIO;
4646b411b363SPhilipp Reisner }
4647b411b363SPhilipp Reisner if (want == 0)
46482c46407dSAndreas Gruenbacher return 0;
464969a22773SAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, want);
465082bc0194SAndreas Gruenbacher if (err)
46512c46407dSAndreas Gruenbacher return err;
4652b411b363SPhilipp Reisner
465369a22773SAndreas Gruenbacher drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4654b411b363SPhilipp Reisner
4655b411b363SPhilipp Reisner c->word_offset += num_words;
4656b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG;
4657b411b363SPhilipp Reisner if (c->bit_offset > c->bm_bits)
4658b411b363SPhilipp Reisner c->bit_offset = c->bm_bits;
4659b411b363SPhilipp Reisner
46602c46407dSAndreas Gruenbacher return 1;
4661b411b363SPhilipp Reisner }
4662b411b363SPhilipp Reisner
dcbp_get_code(struct p_compressed_bm * p)4663a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4664a02d1240SAndreas Gruenbacher {
4665a02d1240SAndreas Gruenbacher return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4666a02d1240SAndreas Gruenbacher }
4667a02d1240SAndreas Gruenbacher
dcbp_get_start(struct p_compressed_bm * p)4668a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p)
4669a02d1240SAndreas Gruenbacher {
4670a02d1240SAndreas Gruenbacher return (p->encoding & 0x80) != 0;
4671a02d1240SAndreas Gruenbacher }
4672a02d1240SAndreas Gruenbacher
dcbp_get_pad_bits(struct p_compressed_bm * p)4673a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4674a02d1240SAndreas Gruenbacher {
4675a02d1240SAndreas Gruenbacher return (p->encoding >> 4) & 0x7;
4676a02d1240SAndreas Gruenbacher }
4677a02d1240SAndreas Gruenbacher
46789b48ff07SLee Jones /*
46792c46407dSAndreas Gruenbacher * recv_bm_rle_bits
46802c46407dSAndreas Gruenbacher *
46812c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error
46822c46407dSAndreas Gruenbacher * code upon failure.
46832c46407dSAndreas Gruenbacher */
46842c46407dSAndreas Gruenbacher static int
recv_bm_rle_bits(struct drbd_peer_device * peer_device,struct p_compressed_bm * p,struct bm_xfer_ctx * c,unsigned int len)468569a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4686b411b363SPhilipp Reisner struct p_compressed_bm *p,
4687c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c,
4688c6d25cfeSPhilipp Reisner unsigned int len)
4689b411b363SPhilipp Reisner {
4690b411b363SPhilipp Reisner struct bitstream bs;
4691b411b363SPhilipp Reisner u64 look_ahead;
4692b411b363SPhilipp Reisner u64 rl;
4693b411b363SPhilipp Reisner u64 tmp;
4694b411b363SPhilipp Reisner unsigned long s = c->bit_offset;
4695b411b363SPhilipp Reisner unsigned long e;
4696a02d1240SAndreas Gruenbacher int toggle = dcbp_get_start(p);
4697b411b363SPhilipp Reisner int have;
4698b411b363SPhilipp Reisner int bits;
4699b411b363SPhilipp Reisner
4700a02d1240SAndreas Gruenbacher bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4701b411b363SPhilipp Reisner
4702b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &look_ahead, 64);
4703b411b363SPhilipp Reisner if (bits < 0)
47042c46407dSAndreas Gruenbacher return -EIO;
4705b411b363SPhilipp Reisner
4706b411b363SPhilipp Reisner for (have = bits; have > 0; s += rl, toggle = !toggle) {
4707b411b363SPhilipp Reisner bits = vli_decode_bits(&rl, look_ahead);
4708b411b363SPhilipp Reisner if (bits <= 0)
47092c46407dSAndreas Gruenbacher return -EIO;
4710b411b363SPhilipp Reisner
4711b411b363SPhilipp Reisner if (toggle) {
4712b411b363SPhilipp Reisner e = s + rl -1;
4713b411b363SPhilipp Reisner if (e >= c->bm_bits) {
471469a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
47152c46407dSAndreas Gruenbacher return -EIO;
4716b411b363SPhilipp Reisner }
471769a22773SAndreas Gruenbacher _drbd_bm_set_bits(peer_device->device, s, e);
4718b411b363SPhilipp Reisner }
4719b411b363SPhilipp Reisner
4720b411b363SPhilipp Reisner if (have < bits) {
472169a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4722b411b363SPhilipp Reisner have, bits, look_ahead,
4723b411b363SPhilipp Reisner (unsigned int)(bs.cur.b - p->code),
4724b411b363SPhilipp Reisner (unsigned int)bs.buf_len);
47252c46407dSAndreas Gruenbacher return -EIO;
4726b411b363SPhilipp Reisner }
4727d2da5b0cSLars Ellenberg /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4728d2da5b0cSLars Ellenberg if (likely(bits < 64))
4729b411b363SPhilipp Reisner look_ahead >>= bits;
4730d2da5b0cSLars Ellenberg else
4731d2da5b0cSLars Ellenberg look_ahead = 0;
4732b411b363SPhilipp Reisner have -= bits;
4733b411b363SPhilipp Reisner
4734b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4735b411b363SPhilipp Reisner if (bits < 0)
47362c46407dSAndreas Gruenbacher return -EIO;
4737b411b363SPhilipp Reisner look_ahead |= tmp << have;
4738b411b363SPhilipp Reisner have += bits;
4739b411b363SPhilipp Reisner }
4740b411b363SPhilipp Reisner
4741b411b363SPhilipp Reisner c->bit_offset = s;
4742b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c);
4743b411b363SPhilipp Reisner
47442c46407dSAndreas Gruenbacher return (s != c->bm_bits);
4745b411b363SPhilipp Reisner }
4746b411b363SPhilipp Reisner
47479b48ff07SLee Jones /*
47482c46407dSAndreas Gruenbacher * decode_bitmap_c
47492c46407dSAndreas Gruenbacher *
47502c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error
47512c46407dSAndreas Gruenbacher * code upon failure.
47522c46407dSAndreas Gruenbacher */
47532c46407dSAndreas Gruenbacher static int
decode_bitmap_c(struct drbd_peer_device * peer_device,struct p_compressed_bm * p,struct bm_xfer_ctx * c,unsigned int len)475469a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device,
4755b411b363SPhilipp Reisner struct p_compressed_bm *p,
4756c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c,
4757c6d25cfeSPhilipp Reisner unsigned int len)
4758b411b363SPhilipp Reisner {
4759a02d1240SAndreas Gruenbacher if (dcbp_get_code(p) == RLE_VLI_Bits)
476069a22773SAndreas Gruenbacher return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4761b411b363SPhilipp Reisner
4762b411b363SPhilipp Reisner /* other variants had been implemented for evaluation,
4763b411b363SPhilipp Reisner * but have been dropped as this one turned out to be "best"
4764b411b363SPhilipp Reisner * during all our tests. */
4765b411b363SPhilipp Reisner
476669a22773SAndreas Gruenbacher drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
476769a22773SAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
47682c46407dSAndreas Gruenbacher return -EIO;
4769b411b363SPhilipp Reisner }
4770b411b363SPhilipp Reisner
INFO_bm_xfer_stats(struct drbd_peer_device * peer_device,const char * direction,struct bm_xfer_ctx * c)47715e54c2a6SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
4772b411b363SPhilipp Reisner const char *direction, struct bm_xfer_ctx *c)
4773b411b363SPhilipp Reisner {
4774b411b363SPhilipp Reisner /* what would it take to transfer it "plaintext" */
47755e54c2a6SAndreas Gruenbacher unsigned int header_size = drbd_header_size(peer_device->connection);
477650d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
477750d0b1adSAndreas Gruenbacher unsigned int plain =
477850d0b1adSAndreas Gruenbacher header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
477950d0b1adSAndreas Gruenbacher c->bm_words * sizeof(unsigned long);
478050d0b1adSAndreas Gruenbacher unsigned int total = c->bytes[0] + c->bytes[1];
478150d0b1adSAndreas Gruenbacher unsigned int r;
4782b411b363SPhilipp Reisner
4783b411b363SPhilipp Reisner /* total can not be zero. but just in case: */
4784b411b363SPhilipp Reisner if (total == 0)
4785b411b363SPhilipp Reisner return;
4786b411b363SPhilipp Reisner
4787b411b363SPhilipp Reisner /* don't report if not compressed */
4788b411b363SPhilipp Reisner if (total >= plain)
4789b411b363SPhilipp Reisner return;
4790b411b363SPhilipp Reisner
4791b411b363SPhilipp Reisner /* total < plain. check for overflow, still */
4792b411b363SPhilipp Reisner r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4793b411b363SPhilipp Reisner : (1000 * total / plain);
4794b411b363SPhilipp Reisner
4795b411b363SPhilipp Reisner if (r > 1000)
4796b411b363SPhilipp Reisner r = 1000;
4797b411b363SPhilipp Reisner
4798b411b363SPhilipp Reisner r = 1000 - r;
47995e54c2a6SAndreas Gruenbacher drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4800b411b363SPhilipp Reisner "total %u; compression: %u.%u%%\n",
4801b411b363SPhilipp Reisner direction,
4802b411b363SPhilipp Reisner c->bytes[1], c->packets[1],
4803b411b363SPhilipp Reisner c->bytes[0], c->packets[0],
4804b411b363SPhilipp Reisner total, r/10, r % 10);
4805b411b363SPhilipp Reisner }
4806b411b363SPhilipp Reisner
4807b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher,
4808b411b363SPhilipp Reisner it does not matter if the process it in 32 bit chunks or 64 bit
4809b411b363SPhilipp Reisner chunks as long as it is little endian. (Understand it as byte stream,
4810b411b363SPhilipp Reisner beginning with the lowest byte...) If we would use big endian
4811b411b363SPhilipp Reisner we would need to process it from the highest address to the lowest,
4812b411b363SPhilipp Reisner in order to be agnostic to the 32 vs 64 bits issue.
4813b411b363SPhilipp Reisner
4814b411b363SPhilipp Reisner returns 0 on failure, 1 if we successfully received it. */
receive_bitmap(struct drbd_connection * connection,struct packet_info * pi)4815bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4816b411b363SPhilipp Reisner {
48179f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4818b30ab791SAndreas Gruenbacher struct drbd_device *device;
4819b411b363SPhilipp Reisner struct bm_xfer_ctx c;
48202c46407dSAndreas Gruenbacher int err;
48214a76b161SAndreas Gruenbacher
48229f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
48239f4fe9adSAndreas Gruenbacher if (!peer_device)
48244a76b161SAndreas Gruenbacher return -EIO;
48259f4fe9adSAndreas Gruenbacher device = peer_device->device;
4826b411b363SPhilipp Reisner
4827b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
482820ceb2b2SLars Ellenberg /* you are supposed to send additional out-of-sync information
482920ceb2b2SLars Ellenberg * if you actually set bits during this phase */
4830b411b363SPhilipp Reisner
4831b411b363SPhilipp Reisner c = (struct bm_xfer_ctx) {
4832b30ab791SAndreas Gruenbacher .bm_bits = drbd_bm_bits(device),
4833b30ab791SAndreas Gruenbacher .bm_words = drbd_bm_words(device),
4834b411b363SPhilipp Reisner };
4835b411b363SPhilipp Reisner
48362c46407dSAndreas Gruenbacher for(;;) {
4837e658983aSAndreas Gruenbacher if (pi->cmd == P_BITMAP)
483869a22773SAndreas Gruenbacher err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4839e658983aSAndreas Gruenbacher else if (pi->cmd == P_COMPRESSED_BITMAP) {
4840b411b363SPhilipp Reisner /* MAYBE: sanity check that we speak proto >= 90,
4841b411b363SPhilipp Reisner * and the feature is enabled! */
4842e658983aSAndreas Gruenbacher struct p_compressed_bm *p = pi->data;
4843b411b363SPhilipp Reisner
4844bde89a9eSAndreas Gruenbacher if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4845d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too large\n");
484682bc0194SAndreas Gruenbacher err = -EIO;
4847b411b363SPhilipp Reisner goto out;
4848b411b363SPhilipp Reisner }
4849e658983aSAndreas Gruenbacher if (pi->size <= sizeof(*p)) {
4850d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
485182bc0194SAndreas Gruenbacher err = -EIO;
485278fcbdaeSAndreas Gruenbacher goto out;
4853b411b363SPhilipp Reisner }
48549f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, pi->size);
4855e658983aSAndreas Gruenbacher if (err)
4856e658983aSAndreas Gruenbacher goto out;
485769a22773SAndreas Gruenbacher err = decode_bitmap_c(peer_device, p, &c, pi->size);
4858b411b363SPhilipp Reisner } else {
4859d0180171SAndreas Gruenbacher drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
486082bc0194SAndreas Gruenbacher err = -EIO;
4861b411b363SPhilipp Reisner goto out;
4862b411b363SPhilipp Reisner }
4863b411b363SPhilipp Reisner
4864e2857216SAndreas Gruenbacher c.packets[pi->cmd == P_BITMAP]++;
4865bde89a9eSAndreas Gruenbacher c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4866b411b363SPhilipp Reisner
48672c46407dSAndreas Gruenbacher if (err <= 0) {
48682c46407dSAndreas Gruenbacher if (err < 0)
48692c46407dSAndreas Gruenbacher goto out;
4870b411b363SPhilipp Reisner break;
48712c46407dSAndreas Gruenbacher }
48729f4fe9adSAndreas Gruenbacher err = drbd_recv_header(peer_device->connection, pi);
487382bc0194SAndreas Gruenbacher if (err)
4874b411b363SPhilipp Reisner goto out;
48752c46407dSAndreas Gruenbacher }
4876b411b363SPhilipp Reisner
48775e54c2a6SAndreas Gruenbacher INFO_bm_xfer_stats(peer_device, "receive", &c);
4878b411b363SPhilipp Reisner
4879b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_BITMAP_T) {
4880de1f8e4aSAndreas Gruenbacher enum drbd_state_rv rv;
4881de1f8e4aSAndreas Gruenbacher
48828164dd6cSAndreas Gruenbacher err = drbd_send_bitmap(device, peer_device);
488382bc0194SAndreas Gruenbacher if (err)
4884b411b363SPhilipp Reisner goto out;
4885b411b363SPhilipp Reisner /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
4886b30ab791SAndreas Gruenbacher rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
48870b0ba1efSAndreas Gruenbacher D_ASSERT(device, rv == SS_SUCCESS);
4888b30ab791SAndreas Gruenbacher } else if (device->state.conn != C_WF_BITMAP_S) {
4889b411b363SPhilipp Reisner /* admin may have requested C_DISCONNECTING,
4890b411b363SPhilipp Reisner * other threads may have noticed network errors */
4891d0180171SAndreas Gruenbacher drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4892b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn));
4893b411b363SPhilipp Reisner }
489482bc0194SAndreas Gruenbacher err = 0;
4895b411b363SPhilipp Reisner
4896b411b363SPhilipp Reisner out:
4897b30ab791SAndreas Gruenbacher drbd_bm_unlock(device);
4898b30ab791SAndreas Gruenbacher if (!err && device->state.conn == C_WF_BITMAP_S)
4899b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE);
490082bc0194SAndreas Gruenbacher return err;
4901b411b363SPhilipp Reisner }
4902b411b363SPhilipp Reisner
receive_skip(struct drbd_connection * connection,struct packet_info * pi)4903bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4904b411b363SPhilipp Reisner {
49051ec861ebSAndreas Gruenbacher drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4906e2857216SAndreas Gruenbacher pi->cmd, pi->size);
4907b411b363SPhilipp Reisner
4908bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi);
4909b411b363SPhilipp Reisner }
4910b411b363SPhilipp Reisner
receive_UnplugRemote(struct drbd_connection * connection,struct packet_info * pi)4911bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4912b411b363SPhilipp Reisner {
4913b411b363SPhilipp Reisner /* Make sure we've acked all the TCP data associated
4914b411b363SPhilipp Reisner * with the data requests being unplugged */
4915ddd061b8SChristoph Hellwig tcp_sock_set_quickack(connection->data.socket->sk, 2);
491682bc0194SAndreas Gruenbacher return 0;
4917b411b363SPhilipp Reisner }
4918b411b363SPhilipp Reisner
receive_out_of_sync(struct drbd_connection * connection,struct packet_info * pi)4919bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
492073a01a18SPhilipp Reisner {
49219f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
4922b30ab791SAndreas Gruenbacher struct drbd_device *device;
4923e658983aSAndreas Gruenbacher struct p_block_desc *p = pi->data;
49244a76b161SAndreas Gruenbacher
49259f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
49269f4fe9adSAndreas Gruenbacher if (!peer_device)
49274a76b161SAndreas Gruenbacher return -EIO;
49289f4fe9adSAndreas Gruenbacher device = peer_device->device;
492973a01a18SPhilipp Reisner
4930b30ab791SAndreas Gruenbacher switch (device->state.conn) {
4931f735e363SLars Ellenberg case C_WF_SYNC_UUID:
4932f735e363SLars Ellenberg case C_WF_BITMAP_T:
4933f735e363SLars Ellenberg case C_BEHIND:
4934f735e363SLars Ellenberg break;
4935f735e363SLars Ellenberg default:
4936d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4937b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn));
4938f735e363SLars Ellenberg }
4939f735e363SLars Ellenberg
49400d11f3cfSChristoph Böhmwalder drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
494173a01a18SPhilipp Reisner
494282bc0194SAndreas Gruenbacher return 0;
494373a01a18SPhilipp Reisner }
494473a01a18SPhilipp Reisner
receive_rs_deallocated(struct drbd_connection * connection,struct packet_info * pi)4945700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4946700ca8c0SPhilipp Reisner {
4947700ca8c0SPhilipp Reisner struct drbd_peer_device *peer_device;
4948700ca8c0SPhilipp Reisner struct p_block_desc *p = pi->data;
4949700ca8c0SPhilipp Reisner struct drbd_device *device;
4950700ca8c0SPhilipp Reisner sector_t sector;
4951700ca8c0SPhilipp Reisner int size, err = 0;
4952700ca8c0SPhilipp Reisner
4953700ca8c0SPhilipp Reisner peer_device = conn_peer_device(connection, pi->vnr);
4954700ca8c0SPhilipp Reisner if (!peer_device)
4955700ca8c0SPhilipp Reisner return -EIO;
4956700ca8c0SPhilipp Reisner device = peer_device->device;
4957700ca8c0SPhilipp Reisner
4958700ca8c0SPhilipp Reisner sector = be64_to_cpu(p->sector);
4959700ca8c0SPhilipp Reisner size = be32_to_cpu(p->blksize);
4960700ca8c0SPhilipp Reisner
49610d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
4962700ca8c0SPhilipp Reisner
4963700ca8c0SPhilipp Reisner if (get_ldev(device)) {
4964700ca8c0SPhilipp Reisner struct drbd_peer_request *peer_req;
4965700ca8c0SPhilipp Reisner
4966700ca8c0SPhilipp Reisner peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
49679104d31aSLars Ellenberg size, 0, GFP_NOIO);
4968700ca8c0SPhilipp Reisner if (!peer_req) {
4969700ca8c0SPhilipp Reisner put_ldev(device);
4970700ca8c0SPhilipp Reisner return -ENOMEM;
4971700ca8c0SPhilipp Reisner }
4972700ca8c0SPhilipp Reisner
4973700ca8c0SPhilipp Reisner peer_req->w.cb = e_end_resync_block;
4974ce668b6dSChristoph Böhmwalder peer_req->opf = REQ_OP_DISCARD;
4975700ca8c0SPhilipp Reisner peer_req->submit_jif = jiffies;
4976f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM;
4977700ca8c0SPhilipp Reisner
4978700ca8c0SPhilipp Reisner spin_lock_irq(&device->resource->req_lock);
4979700ca8c0SPhilipp Reisner list_add_tail(&peer_req->w.list, &device->sync_ee);
4980700ca8c0SPhilipp Reisner spin_unlock_irq(&device->resource->req_lock);
4981700ca8c0SPhilipp Reisner
4982700ca8c0SPhilipp Reisner atomic_add(pi->size >> 9, &device->rs_sect_ev);
4983ce668b6dSChristoph Böhmwalder err = drbd_submit_peer_request(peer_req);
4984700ca8c0SPhilipp Reisner
4985700ca8c0SPhilipp Reisner if (err) {
4986700ca8c0SPhilipp Reisner spin_lock_irq(&device->resource->req_lock);
4987700ca8c0SPhilipp Reisner list_del(&peer_req->w.list);
4988700ca8c0SPhilipp Reisner spin_unlock_irq(&device->resource->req_lock);
4989700ca8c0SPhilipp Reisner
4990700ca8c0SPhilipp Reisner drbd_free_peer_req(device, peer_req);
4991700ca8c0SPhilipp Reisner put_ldev(device);
4992700ca8c0SPhilipp Reisner err = 0;
4993700ca8c0SPhilipp Reisner goto fail;
4994700ca8c0SPhilipp Reisner }
4995700ca8c0SPhilipp Reisner
4996700ca8c0SPhilipp Reisner inc_unacked(device);
4997700ca8c0SPhilipp Reisner
4998700ca8c0SPhilipp Reisner /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4999700ca8c0SPhilipp Reisner as well as drbd_rs_complete_io() */
5000700ca8c0SPhilipp Reisner } else {
5001700ca8c0SPhilipp Reisner fail:
5002700ca8c0SPhilipp Reisner drbd_rs_complete_io(device, sector);
5003700ca8c0SPhilipp Reisner drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
5004700ca8c0SPhilipp Reisner }
5005700ca8c0SPhilipp Reisner
5006700ca8c0SPhilipp Reisner atomic_add(size >> 9, &device->rs_sect_in);
5007700ca8c0SPhilipp Reisner
5008700ca8c0SPhilipp Reisner return err;
5009700ca8c0SPhilipp Reisner }
5010700ca8c0SPhilipp Reisner
501102918be2SPhilipp Reisner struct data_cmd {
501202918be2SPhilipp Reisner int expect_payload;
50139104d31aSLars Ellenberg unsigned int pkt_size;
5014bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *, struct packet_info *);
5015b411b363SPhilipp Reisner };
5016b411b363SPhilipp Reisner
501702918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = {
501802918be2SPhilipp Reisner [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
501902918be2SPhilipp Reisner [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
502002918be2SPhilipp Reisner [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
502102918be2SPhilipp Reisner [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
5022e658983aSAndreas Gruenbacher [P_BITMAP] = { 1, 0, receive_bitmap } ,
5023e658983aSAndreas Gruenbacher [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
5024e658983aSAndreas Gruenbacher [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
502502918be2SPhilipp Reisner [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
502602918be2SPhilipp Reisner [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
5027e658983aSAndreas Gruenbacher [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
5028e658983aSAndreas Gruenbacher [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
502902918be2SPhilipp Reisner [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
503002918be2SPhilipp Reisner [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
503102918be2SPhilipp Reisner [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
503202918be2SPhilipp Reisner [P_STATE] = { 0, sizeof(struct p_state), receive_state },
503302918be2SPhilipp Reisner [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
503402918be2SPhilipp Reisner [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
503502918be2SPhilipp Reisner [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
503602918be2SPhilipp Reisner [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
503702918be2SPhilipp Reisner [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
5038700ca8c0SPhilipp Reisner [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest },
503902918be2SPhilipp Reisner [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
504073a01a18SPhilipp Reisner [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
50414a76b161SAndreas Gruenbacher [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
5042036b17eaSPhilipp Reisner [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
5043a0fb3c47SLars Ellenberg [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
5044f31e583aSLars Ellenberg [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data },
5045700ca8c0SPhilipp Reisner [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
504602918be2SPhilipp Reisner };
504702918be2SPhilipp Reisner
drbdd(struct drbd_connection * connection)5048bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection)
5049b411b363SPhilipp Reisner {
505077351055SPhilipp Reisner struct packet_info pi;
505102918be2SPhilipp Reisner size_t shs; /* sub header size */
505282bc0194SAndreas Gruenbacher int err;
5053b411b363SPhilipp Reisner
5054bde89a9eSAndreas Gruenbacher while (get_t_state(&connection->receiver) == RUNNING) {
50559104d31aSLars Ellenberg struct data_cmd const *cmd;
5056deebe195SAndreas Gruenbacher
5057bde89a9eSAndreas Gruenbacher drbd_thread_current_set_cpu(&connection->receiver);
5058c51a0ef3SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
5059c51a0ef3SLars Ellenberg if (drbd_recv_header_maybe_unplug(connection, &pi))
506002918be2SPhilipp Reisner goto err_out;
506102918be2SPhilipp Reisner
5062deebe195SAndreas Gruenbacher cmd = &drbd_cmd_handler[pi.cmd];
50634a76b161SAndreas Gruenbacher if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
50641ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected data packet %s (0x%04x)",
50652fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd);
506602918be2SPhilipp Reisner goto err_out;
50670b33a916SLars Ellenberg }
5068b411b363SPhilipp Reisner
5069e658983aSAndreas Gruenbacher shs = cmd->pkt_size;
50709104d31aSLars Ellenberg if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
50719104d31aSLars Ellenberg shs += sizeof(struct o_qlim);
5072e658983aSAndreas Gruenbacher if (pi.size > shs && !cmd->expect_payload) {
50731ec861ebSAndreas Gruenbacher drbd_err(connection, "No payload expected %s l:%d\n",
50742fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.size);
5075c13f7e1aSLars Ellenberg goto err_out;
5076c13f7e1aSLars Ellenberg }
50779104d31aSLars Ellenberg if (pi.size < shs) {
50789104d31aSLars Ellenberg drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
50799104d31aSLars Ellenberg cmdname(pi.cmd), (int)shs, pi.size);
50809104d31aSLars Ellenberg goto err_out;
50819104d31aSLars Ellenberg }
5082c13f7e1aSLars Ellenberg
5083c13f7e1aSLars Ellenberg if (shs) {
5084944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_all_warn);
5085bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, pi.data, shs);
5086a5c31904SAndreas Gruenbacher if (err)
508702918be2SPhilipp Reisner goto err_out;
5088e2857216SAndreas Gruenbacher pi.size -= shs;
5089b411b363SPhilipp Reisner }
509002918be2SPhilipp Reisner
5091944410e9SLars Ellenberg update_receiver_timing_details(connection, cmd->fn);
5092bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi);
50934a76b161SAndreas Gruenbacher if (err) {
50941ec861ebSAndreas Gruenbacher drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
50959f5bdc33SAndreas Gruenbacher cmdname(pi.cmd), err, pi.size);
509602918be2SPhilipp Reisner goto err_out;
509702918be2SPhilipp Reisner }
509802918be2SPhilipp Reisner }
509982bc0194SAndreas Gruenbacher return;
510002918be2SPhilipp Reisner
510102918be2SPhilipp Reisner err_out:
5102bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5103b411b363SPhilipp Reisner }
5104b411b363SPhilipp Reisner
conn_disconnect(struct drbd_connection * connection)5105bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection)
5106f70b3511SPhilipp Reisner {
5107c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5108bbeb641cSPhilipp Reisner enum drbd_conns oc;
5109376694a0SPhilipp Reisner int vnr;
5110f70b3511SPhilipp Reisner
5111bde89a9eSAndreas Gruenbacher if (connection->cstate == C_STANDALONE)
5112b411b363SPhilipp Reisner return;
5113b411b363SPhilipp Reisner
5114545752d5SLars Ellenberg /* We are about to start the cleanup after connection loss.
5115545752d5SLars Ellenberg * Make sure drbd_make_request knows about that.
5116545752d5SLars Ellenberg * Usually we should be in some network failure state already,
5117545752d5SLars Ellenberg * but just in case we are not, we fix it up here.
5118545752d5SLars Ellenberg */
5119bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5120545752d5SLars Ellenberg
5121668700b4SPhilipp Reisner /* ack_receiver does not clean up anything. it must not interfere, either */
51221c03e520SPhilipp Reisner drbd_thread_stop(&connection->ack_receiver);
5123668700b4SPhilipp Reisner if (connection->ack_sender) {
5124668700b4SPhilipp Reisner destroy_workqueue(connection->ack_sender);
5125668700b4SPhilipp Reisner connection->ack_sender = NULL;
5126668700b4SPhilipp Reisner }
5127bde89a9eSAndreas Gruenbacher drbd_free_sock(connection);
5128360cc740SPhilipp Reisner
5129c141ebdaSPhilipp Reisner rcu_read_lock();
5130c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5131c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
5132b30ab791SAndreas Gruenbacher kref_get(&device->kref);
5133c141ebdaSPhilipp Reisner rcu_read_unlock();
513469a22773SAndreas Gruenbacher drbd_disconnected(peer_device);
5135c06ece6bSAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device);
5136c141ebdaSPhilipp Reisner rcu_read_lock();
5137c141ebdaSPhilipp Reisner }
5138c141ebdaSPhilipp Reisner rcu_read_unlock();
5139c141ebdaSPhilipp Reisner
5140bde89a9eSAndreas Gruenbacher if (!list_empty(&connection->current_epoch->list))
51411ec861ebSAndreas Gruenbacher drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
514212038a3aSPhilipp Reisner /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
5143bde89a9eSAndreas Gruenbacher atomic_set(&connection->current_epoch->epoch_size, 0);
5144bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = false;
514512038a3aSPhilipp Reisner
51461ec861ebSAndreas Gruenbacher drbd_info(connection, "Connection closed\n");
5147360cc740SPhilipp Reisner
5148bde89a9eSAndreas Gruenbacher if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5149bde89a9eSAndreas Gruenbacher conn_try_outdate_peer_async(connection);
5150cb703454SPhilipp Reisner
51510500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock);
5152bde89a9eSAndreas Gruenbacher oc = connection->cstate;
5153bbeb641cSPhilipp Reisner if (oc >= C_UNCONNECTED)
5154bde89a9eSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5155bbeb641cSPhilipp Reisner
51560500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock);
5157360cc740SPhilipp Reisner
5158f3dfa40aSLars Ellenberg if (oc == C_DISCONNECTING)
5159bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5160360cc740SPhilipp Reisner }
5161360cc740SPhilipp Reisner
drbd_disconnected(struct drbd_peer_device * peer_device)516269a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device)
5163360cc740SPhilipp Reisner {
516469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device;
5165360cc740SPhilipp Reisner unsigned int i;
5166b411b363SPhilipp Reisner
516785719573SPhilipp Reisner /* wait for current activity to cease. */
51680500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
5169b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->active_ee);
5170b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->sync_ee);
5171b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->read_ee);
51720500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
5173b411b363SPhilipp Reisner
5174b411b363SPhilipp Reisner /* We do not have data structures that would allow us to
5175b411b363SPhilipp Reisner * get the rs_pending_cnt down to 0 again.
5176b411b363SPhilipp Reisner * * On C_SYNC_TARGET we do not have any data structures describing
5177b411b363SPhilipp Reisner * the pending RSDataRequest's we have sent.
5178b411b363SPhilipp Reisner * * On C_SYNC_SOURCE there is no data structure that tracks
5179b411b363SPhilipp Reisner * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5180b411b363SPhilipp Reisner * And no, it is not the sum of the reference counts in the
5181b411b363SPhilipp Reisner * resync_LRU. The resync_LRU tracks the whole operation including
5182b411b363SPhilipp Reisner * the disk-IO, while the rs_pending_cnt only tracks the blocks
5183b411b363SPhilipp Reisner * on the fly. */
5184b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device);
5185b30ab791SAndreas Gruenbacher device->rs_total = 0;
5186b30ab791SAndreas Gruenbacher device->rs_failed = 0;
5187b30ab791SAndreas Gruenbacher atomic_set(&device->rs_pending_cnt, 0);
5188b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait);
5189b411b363SPhilipp Reisner
5190b30ab791SAndreas Gruenbacher del_timer_sync(&device->resync_timer);
51912bccef39SKees Cook resync_timer_fn(&device->resync_timer);
5192b411b363SPhilipp Reisner
5193b411b363SPhilipp Reisner /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5194b411b363SPhilipp Reisner * w_make_resync_request etc. which may still be on the worker queue
5195b411b363SPhilipp Reisner * to be "canceled" */
5196b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work);
5197b411b363SPhilipp Reisner
5198b30ab791SAndreas Gruenbacher drbd_finish_peer_reqs(device);
5199b411b363SPhilipp Reisner
5200d10b4ea3SPhilipp Reisner /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5201d10b4ea3SPhilipp Reisner might have issued a work again. The one before drbd_finish_peer_reqs() is
5202d10b4ea3SPhilipp Reisner necessary to reclain net_ee in drbd_finish_peer_reqs(). */
5203b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work);
5204d10b4ea3SPhilipp Reisner
520508332d73SLars Ellenberg /* need to do it again, drbd_finish_peer_reqs() may have populated it
520608332d73SLars Ellenberg * again via drbd_try_clear_on_disk_bm(). */
5207b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device);
5208b411b363SPhilipp Reisner
5209b30ab791SAndreas Gruenbacher kfree(device->p_uuid);
5210b30ab791SAndreas Gruenbacher device->p_uuid = NULL;
5211b411b363SPhilipp Reisner
5212b30ab791SAndreas Gruenbacher if (!drbd_suspended(device))
521369a22773SAndreas Gruenbacher tl_clear(peer_device->connection);
5214b411b363SPhilipp Reisner
5215b30ab791SAndreas Gruenbacher drbd_md_sync(device);
5216b411b363SPhilipp Reisner
5217be115b69SLars Ellenberg if (get_ldev(device)) {
5218be115b69SLars Ellenberg drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
52198164dd6cSAndreas Gruenbacher "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
5220be115b69SLars Ellenberg put_ldev(device);
5221be115b69SLars Ellenberg }
522220ceb2b2SLars Ellenberg
5223b411b363SPhilipp Reisner /* tcp_close and release of sendpage pages can be deferred. I don't
5224b411b363SPhilipp Reisner * want to use SO_LINGER, because apparently it can be deferred for
5225b411b363SPhilipp Reisner * more than 20 seconds (longest time I checked).
5226b411b363SPhilipp Reisner *
5227b411b363SPhilipp Reisner * Actually we don't care for exactly when the network stack does its
5228b411b363SPhilipp Reisner * put_page(), but release our reference on these pages right here.
5229b411b363SPhilipp Reisner */
5230b30ab791SAndreas Gruenbacher i = drbd_free_peer_reqs(device, &device->net_ee);
5231b411b363SPhilipp Reisner if (i)
5232d0180171SAndreas Gruenbacher drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5233b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use_by_net);
5234435f0740SLars Ellenberg if (i)
5235d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5236b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use);
5237b411b363SPhilipp Reisner if (i)
5238d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5239b411b363SPhilipp Reisner
52400b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->read_ee));
52410b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->active_ee));
52420b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->sync_ee));
52430b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->done_ee));
5244b411b363SPhilipp Reisner
5245360cc740SPhilipp Reisner return 0;
5246b411b363SPhilipp Reisner }
5247b411b363SPhilipp Reisner
5248b411b363SPhilipp Reisner /*
5249b411b363SPhilipp Reisner * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5250b411b363SPhilipp Reisner * we can agree on is stored in agreed_pro_version.
5251b411b363SPhilipp Reisner *
5252b411b363SPhilipp Reisner * feature flags and the reserved array should be enough room for future
5253b411b363SPhilipp Reisner * enhancements of the handshake protocol, and possible plugins...
5254b411b363SPhilipp Reisner *
5255b411b363SPhilipp Reisner * for now, they are expected to be zero, but ignored.
5256b411b363SPhilipp Reisner */
drbd_send_features(struct drbd_connection * connection)5257bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection)
5258b411b363SPhilipp Reisner {
52599f5bdc33SAndreas Gruenbacher struct drbd_socket *sock;
52609f5bdc33SAndreas Gruenbacher struct p_connection_features *p;
5261b411b363SPhilipp Reisner
5262bde89a9eSAndreas Gruenbacher sock = &connection->data;
5263bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock);
52649f5bdc33SAndreas Gruenbacher if (!p)
5265e8d17b01SAndreas Gruenbacher return -EIO;
5266b411b363SPhilipp Reisner memset(p, 0, sizeof(*p));
5267b411b363SPhilipp Reisner p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5268b411b363SPhilipp Reisner p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
526920c68fdeSLars Ellenberg p->feature_flags = cpu_to_be32(PRO_FEATURES);
5270bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5271b411b363SPhilipp Reisner }
5272b411b363SPhilipp Reisner
5273b411b363SPhilipp Reisner /*
5274b411b363SPhilipp Reisner * return values:
5275b411b363SPhilipp Reisner * 1 yes, we have a valid connection
5276b411b363SPhilipp Reisner * 0 oops, did not work out, please try again
5277b411b363SPhilipp Reisner * -1 peer talks different language,
5278b411b363SPhilipp Reisner * no point in trying again, please go standalone.
5279b411b363SPhilipp Reisner */
drbd_do_features(struct drbd_connection * connection)5280bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection)
5281b411b363SPhilipp Reisner {
5282bde89a9eSAndreas Gruenbacher /* ASSERT current == connection->receiver ... */
5283e658983aSAndreas Gruenbacher struct p_connection_features *p;
5284e658983aSAndreas Gruenbacher const int expect = sizeof(struct p_connection_features);
528577351055SPhilipp Reisner struct packet_info pi;
5286a5c31904SAndreas Gruenbacher int err;
5287b411b363SPhilipp Reisner
5288bde89a9eSAndreas Gruenbacher err = drbd_send_features(connection);
5289e8d17b01SAndreas Gruenbacher if (err)
5290b411b363SPhilipp Reisner return 0;
5291b411b363SPhilipp Reisner
5292bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi);
529369bc7bc3SAndreas Gruenbacher if (err)
5294b411b363SPhilipp Reisner return 0;
5295b411b363SPhilipp Reisner
52966038178eSAndreas Gruenbacher if (pi.cmd != P_CONNECTION_FEATURES) {
52971ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
529877351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd);
5299b411b363SPhilipp Reisner return -1;
5300b411b363SPhilipp Reisner }
5301b411b363SPhilipp Reisner
530277351055SPhilipp Reisner if (pi.size != expect) {
53031ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
530477351055SPhilipp Reisner expect, pi.size);
5305b411b363SPhilipp Reisner return -1;
5306b411b363SPhilipp Reisner }
5307b411b363SPhilipp Reisner
5308e658983aSAndreas Gruenbacher p = pi.data;
5309bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, p, expect);
5310a5c31904SAndreas Gruenbacher if (err)
5311b411b363SPhilipp Reisner return 0;
5312b411b363SPhilipp Reisner
5313b411b363SPhilipp Reisner p->protocol_min = be32_to_cpu(p->protocol_min);
5314b411b363SPhilipp Reisner p->protocol_max = be32_to_cpu(p->protocol_max);
5315b411b363SPhilipp Reisner if (p->protocol_max == 0)
5316b411b363SPhilipp Reisner p->protocol_max = p->protocol_min;
5317b411b363SPhilipp Reisner
5318b411b363SPhilipp Reisner if (PRO_VERSION_MAX < p->protocol_min ||
5319b411b363SPhilipp Reisner PRO_VERSION_MIN > p->protocol_max)
5320b411b363SPhilipp Reisner goto incompat;
5321b411b363SPhilipp Reisner
5322bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
532320c68fdeSLars Ellenberg connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5324b411b363SPhilipp Reisner
53251ec861ebSAndreas Gruenbacher drbd_info(connection, "Handshake successful: "
5326bde89a9eSAndreas Gruenbacher "Agreed network protocol version %d\n", connection->agreed_pro_version);
5327b411b363SPhilipp Reisner
5328f31e583aSLars Ellenberg drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
53299104d31aSLars Ellenberg connection->agreed_features,
53309104d31aSLars Ellenberg connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
53319104d31aSLars Ellenberg connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5332f31e583aSLars Ellenberg connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
5333f31e583aSLars Ellenberg connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
53349104d31aSLars Ellenberg connection->agreed_features ? "" : " none");
533592d94ae6SPhilipp Reisner
5336b411b363SPhilipp Reisner return 1;
5337b411b363SPhilipp Reisner
5338b411b363SPhilipp Reisner incompat:
53391ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible DRBD dialects: "
5340b411b363SPhilipp Reisner "I support %d-%d, peer supports %d-%d\n",
5341b411b363SPhilipp Reisner PRO_VERSION_MIN, PRO_VERSION_MAX,
5342b411b363SPhilipp Reisner p->protocol_min, p->protocol_max);
5343b411b363SPhilipp Reisner return -1;
5344b411b363SPhilipp Reisner }
5345b411b363SPhilipp Reisner
5346b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
drbd_do_auth(struct drbd_connection * connection)5347bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5348b411b363SPhilipp Reisner {
53491ec861ebSAndreas Gruenbacher drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
53501ec861ebSAndreas Gruenbacher drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5351b10d96cbSJohannes Thoma return -1;
5352b411b363SPhilipp Reisner }
5353b411b363SPhilipp Reisner #else
5354b411b363SPhilipp Reisner #define CHALLENGE_LEN 64
5355b10d96cbSJohannes Thoma
5356b10d96cbSJohannes Thoma /* Return value:
5357b10d96cbSJohannes Thoma 1 - auth succeeded,
5358b10d96cbSJohannes Thoma 0 - failed, try again (network error),
5359b10d96cbSJohannes Thoma -1 - auth failed, don't try again.
5360b10d96cbSJohannes Thoma */
5361b10d96cbSJohannes Thoma
drbd_do_auth(struct drbd_connection * connection)5362bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection)
5363b411b363SPhilipp Reisner {
53649f5bdc33SAndreas Gruenbacher struct drbd_socket *sock;
5365b411b363SPhilipp Reisner char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
5366b411b363SPhilipp Reisner char *response = NULL;
5367b411b363SPhilipp Reisner char *right_response = NULL;
5368b411b363SPhilipp Reisner char *peers_ch = NULL;
536944ed167dSPhilipp Reisner unsigned int key_len;
537044ed167dSPhilipp Reisner char secret[SHARED_SECRET_MAX]; /* 64 byte */
5371b411b363SPhilipp Reisner unsigned int resp_size;
537277ce56e2SArnd Bergmann struct shash_desc *desc;
537377351055SPhilipp Reisner struct packet_info pi;
537444ed167dSPhilipp Reisner struct net_conf *nc;
537569bc7bc3SAndreas Gruenbacher int err, rv;
5376b411b363SPhilipp Reisner
53779f5bdc33SAndreas Gruenbacher /* FIXME: Put the challenge/response into the preallocated socket buffer. */
53789f5bdc33SAndreas Gruenbacher
537944ed167dSPhilipp Reisner rcu_read_lock();
5380bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf);
538144ed167dSPhilipp Reisner key_len = strlen(nc->shared_secret);
538244ed167dSPhilipp Reisner memcpy(secret, nc->shared_secret, key_len);
538344ed167dSPhilipp Reisner rcu_read_unlock();
538444ed167dSPhilipp Reisner
538577ce56e2SArnd Bergmann desc = kmalloc(sizeof(struct shash_desc) +
538677ce56e2SArnd Bergmann crypto_shash_descsize(connection->cram_hmac_tfm),
538777ce56e2SArnd Bergmann GFP_KERNEL);
538877ce56e2SArnd Bergmann if (!desc) {
538977ce56e2SArnd Bergmann rv = -1;
539077ce56e2SArnd Bergmann goto fail;
539177ce56e2SArnd Bergmann }
53929534d671SHerbert Xu desc->tfm = connection->cram_hmac_tfm;
5393b411b363SPhilipp Reisner
53949534d671SHerbert Xu rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5395b411b363SPhilipp Reisner if (rv) {
53969534d671SHerbert Xu drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5397b10d96cbSJohannes Thoma rv = -1;
5398b411b363SPhilipp Reisner goto fail;
5399b411b363SPhilipp Reisner }
5400b411b363SPhilipp Reisner
5401b411b363SPhilipp Reisner get_random_bytes(my_challenge, CHALLENGE_LEN);
5402b411b363SPhilipp Reisner
5403bde89a9eSAndreas Gruenbacher sock = &connection->data;
5404bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) {
54059f5bdc33SAndreas Gruenbacher rv = 0;
54069f5bdc33SAndreas Gruenbacher goto fail;
54079f5bdc33SAndreas Gruenbacher }
5408bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
54099f5bdc33SAndreas Gruenbacher my_challenge, CHALLENGE_LEN);
5410b411b363SPhilipp Reisner if (!rv)
5411b411b363SPhilipp Reisner goto fail;
5412b411b363SPhilipp Reisner
5413bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi);
541469bc7bc3SAndreas Gruenbacher if (err) {
5415b411b363SPhilipp Reisner rv = 0;
5416b411b363SPhilipp Reisner goto fail;
5417b411b363SPhilipp Reisner }
5418b411b363SPhilipp Reisner
541977351055SPhilipp Reisner if (pi.cmd != P_AUTH_CHALLENGE) {
54201ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
542177351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd);
54229049ccd4SLars Ellenberg rv = -1;
5423b411b363SPhilipp Reisner goto fail;
5424b411b363SPhilipp Reisner }
5425b411b363SPhilipp Reisner
542677351055SPhilipp Reisner if (pi.size > CHALLENGE_LEN * 2) {
54271ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge payload too big.\n");
5428b10d96cbSJohannes Thoma rv = -1;
5429b411b363SPhilipp Reisner goto fail;
5430b411b363SPhilipp Reisner }
5431b411b363SPhilipp Reisner
543267cca286SPhilipp Reisner if (pi.size < CHALLENGE_LEN) {
543367cca286SPhilipp Reisner drbd_err(connection, "AuthChallenge payload too small.\n");
543467cca286SPhilipp Reisner rv = -1;
543567cca286SPhilipp Reisner goto fail;
543667cca286SPhilipp Reisner }
543767cca286SPhilipp Reisner
543877351055SPhilipp Reisner peers_ch = kmalloc(pi.size, GFP_NOIO);
54398404e191SZhen Lei if (!peers_ch) {
5440b10d96cbSJohannes Thoma rv = -1;
5441b411b363SPhilipp Reisner goto fail;
5442b411b363SPhilipp Reisner }
5443b411b363SPhilipp Reisner
5444bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5445a5c31904SAndreas Gruenbacher if (err) {
5446b411b363SPhilipp Reisner rv = 0;
5447b411b363SPhilipp Reisner goto fail;
5448b411b363SPhilipp Reisner }
5449b411b363SPhilipp Reisner
545067cca286SPhilipp Reisner if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
545167cca286SPhilipp Reisner drbd_err(connection, "Peer presented the same challenge!\n");
545267cca286SPhilipp Reisner rv = -1;
545367cca286SPhilipp Reisner goto fail;
545467cca286SPhilipp Reisner }
545567cca286SPhilipp Reisner
54569534d671SHerbert Xu resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5457b411b363SPhilipp Reisner response = kmalloc(resp_size, GFP_NOIO);
54588404e191SZhen Lei if (!response) {
5459b10d96cbSJohannes Thoma rv = -1;
5460b411b363SPhilipp Reisner goto fail;
5461b411b363SPhilipp Reisner }
5462b411b363SPhilipp Reisner
54639534d671SHerbert Xu rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5464b411b363SPhilipp Reisner if (rv) {
54651ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5466b10d96cbSJohannes Thoma rv = -1;
5467b411b363SPhilipp Reisner goto fail;
5468b411b363SPhilipp Reisner }
5469b411b363SPhilipp Reisner
5470bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) {
54719f5bdc33SAndreas Gruenbacher rv = 0;
54729f5bdc33SAndreas Gruenbacher goto fail;
54739f5bdc33SAndreas Gruenbacher }
5474bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
54759f5bdc33SAndreas Gruenbacher response, resp_size);
5476b411b363SPhilipp Reisner if (!rv)
5477b411b363SPhilipp Reisner goto fail;
5478b411b363SPhilipp Reisner
5479bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi);
548069bc7bc3SAndreas Gruenbacher if (err) {
5481b411b363SPhilipp Reisner rv = 0;
5482b411b363SPhilipp Reisner goto fail;
5483b411b363SPhilipp Reisner }
5484b411b363SPhilipp Reisner
548577351055SPhilipp Reisner if (pi.cmd != P_AUTH_RESPONSE) {
54861ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
548777351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd);
5488b411b363SPhilipp Reisner rv = 0;
5489b411b363SPhilipp Reisner goto fail;
5490b411b363SPhilipp Reisner }
5491b411b363SPhilipp Reisner
549277351055SPhilipp Reisner if (pi.size != resp_size) {
54931ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5494b411b363SPhilipp Reisner rv = 0;
5495b411b363SPhilipp Reisner goto fail;
5496b411b363SPhilipp Reisner }
5497b411b363SPhilipp Reisner
5498bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, response , resp_size);
5499a5c31904SAndreas Gruenbacher if (err) {
5500b411b363SPhilipp Reisner rv = 0;
5501b411b363SPhilipp Reisner goto fail;
5502b411b363SPhilipp Reisner }
5503b411b363SPhilipp Reisner
5504b411b363SPhilipp Reisner right_response = kmalloc(resp_size, GFP_NOIO);
55058404e191SZhen Lei if (!right_response) {
5506b10d96cbSJohannes Thoma rv = -1;
5507b411b363SPhilipp Reisner goto fail;
5508b411b363SPhilipp Reisner }
5509b411b363SPhilipp Reisner
55109534d671SHerbert Xu rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
55119534d671SHerbert Xu right_response);
5512b411b363SPhilipp Reisner if (rv) {
55131ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5514b10d96cbSJohannes Thoma rv = -1;
5515b411b363SPhilipp Reisner goto fail;
5516b411b363SPhilipp Reisner }
5517b411b363SPhilipp Reisner
5518b411b363SPhilipp Reisner rv = !memcmp(response, right_response, resp_size);
5519b411b363SPhilipp Reisner
5520b411b363SPhilipp Reisner if (rv)
55211ec861ebSAndreas Gruenbacher drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
552244ed167dSPhilipp Reisner resp_size);
5523b10d96cbSJohannes Thoma else
5524b10d96cbSJohannes Thoma rv = -1;
5525b411b363SPhilipp Reisner
5526b411b363SPhilipp Reisner fail:
5527b411b363SPhilipp Reisner kfree(peers_ch);
5528b411b363SPhilipp Reisner kfree(response);
5529b411b363SPhilipp Reisner kfree(right_response);
553077ce56e2SArnd Bergmann if (desc) {
55319534d671SHerbert Xu shash_desc_zero(desc);
553277ce56e2SArnd Bergmann kfree(desc);
553377ce56e2SArnd Bergmann }
5534b411b363SPhilipp Reisner
5535b411b363SPhilipp Reisner return rv;
5536b411b363SPhilipp Reisner }
5537b411b363SPhilipp Reisner #endif
5538b411b363SPhilipp Reisner
drbd_receiver(struct drbd_thread * thi)55398fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi)
5540b411b363SPhilipp Reisner {
5541bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection;
5542b411b363SPhilipp Reisner int h;
5543b411b363SPhilipp Reisner
55441ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver (re)started\n");
5545b411b363SPhilipp Reisner
5546b411b363SPhilipp Reisner do {
5547bde89a9eSAndreas Gruenbacher h = conn_connect(connection);
5548b411b363SPhilipp Reisner if (h == 0) {
5549bde89a9eSAndreas Gruenbacher conn_disconnect(connection);
555020ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ);
5551b411b363SPhilipp Reisner }
5552b411b363SPhilipp Reisner if (h == -1) {
55531ec861ebSAndreas Gruenbacher drbd_warn(connection, "Discarding network configuration.\n");
5554bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5555b411b363SPhilipp Reisner }
5556b411b363SPhilipp Reisner } while (h == 0);
5557b411b363SPhilipp Reisner
5558c51a0ef3SLars Ellenberg if (h > 0) {
5559c51a0ef3SLars Ellenberg blk_start_plug(&connection->receiver_plug);
5560bde89a9eSAndreas Gruenbacher drbdd(connection);
5561c51a0ef3SLars Ellenberg blk_finish_plug(&connection->receiver_plug);
5562c51a0ef3SLars Ellenberg }
5563b411b363SPhilipp Reisner
5564bde89a9eSAndreas Gruenbacher conn_disconnect(connection);
5565b411b363SPhilipp Reisner
55661ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver terminated\n");
5567b411b363SPhilipp Reisner return 0;
5568b411b363SPhilipp Reisner }
5569b411b363SPhilipp Reisner
5570b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */
5571b411b363SPhilipp Reisner
got_conn_RqSReply(struct drbd_connection * connection,struct packet_info * pi)5572bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5573b411b363SPhilipp Reisner {
5574e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data;
5575b411b363SPhilipp Reisner int retcode = be32_to_cpu(p->retcode);
5576b411b363SPhilipp Reisner
5577b411b363SPhilipp Reisner if (retcode >= SS_SUCCESS) {
5578bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5579b411b363SPhilipp Reisner } else {
5580bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
55811ec861ebSAndreas Gruenbacher drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5582fc3b10a4SPhilipp Reisner drbd_set_st_err_str(retcode), retcode);
5583fc3b10a4SPhilipp Reisner }
5584bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait);
5585e4f78edeSPhilipp Reisner
55862735a594SAndreas Gruenbacher return 0;
5587fc3b10a4SPhilipp Reisner }
5588e4f78edeSPhilipp Reisner
got_RqSReply(struct drbd_connection * connection,struct packet_info * pi)5589bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5590e4f78edeSPhilipp Reisner {
55919f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5592b30ab791SAndreas Gruenbacher struct drbd_device *device;
5593e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data;
5594e4f78edeSPhilipp Reisner int retcode = be32_to_cpu(p->retcode);
5595e4f78edeSPhilipp Reisner
55969f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
55979f4fe9adSAndreas Gruenbacher if (!peer_device)
55982735a594SAndreas Gruenbacher return -EIO;
55999f4fe9adSAndreas Gruenbacher device = peer_device->device;
56001952e916SAndreas Gruenbacher
5601bde89a9eSAndreas Gruenbacher if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
56020b0ba1efSAndreas Gruenbacher D_ASSERT(device, connection->agreed_pro_version < 100);
5603bde89a9eSAndreas Gruenbacher return got_conn_RqSReply(connection, pi);
56044d0fc3fdSPhilipp Reisner }
56054d0fc3fdSPhilipp Reisner
5606e4f78edeSPhilipp Reisner if (retcode >= SS_SUCCESS) {
5607b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5608e4f78edeSPhilipp Reisner } else {
5609b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_FAIL, &device->flags);
5610d0180171SAndreas Gruenbacher drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5611b411b363SPhilipp Reisner drbd_set_st_err_str(retcode), retcode);
5612b411b363SPhilipp Reisner }
5613b30ab791SAndreas Gruenbacher wake_up(&device->state_wait);
5614b411b363SPhilipp Reisner
56152735a594SAndreas Gruenbacher return 0;
5616b411b363SPhilipp Reisner }
5617b411b363SPhilipp Reisner
got_Ping(struct drbd_connection * connection,struct packet_info * pi)5618bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5619b411b363SPhilipp Reisner {
5620bde89a9eSAndreas Gruenbacher return drbd_send_ping_ack(connection);
5621b411b363SPhilipp Reisner
5622b411b363SPhilipp Reisner }
5623b411b363SPhilipp Reisner
got_PingAck(struct drbd_connection * connection,struct packet_info * pi)5624bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5625b411b363SPhilipp Reisner {
5626b411b363SPhilipp Reisner /* restore idle timeout */
5627bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5628bde89a9eSAndreas Gruenbacher if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5629bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait);
5630b411b363SPhilipp Reisner
56312735a594SAndreas Gruenbacher return 0;
5632b411b363SPhilipp Reisner }
5633b411b363SPhilipp Reisner
got_IsInSync(struct drbd_connection * connection,struct packet_info * pi)5634bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5635b411b363SPhilipp Reisner {
56369f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5637b30ab791SAndreas Gruenbacher struct drbd_device *device;
5638e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
5639b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector);
5640b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize);
5641b411b363SPhilipp Reisner
56429f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
56439f4fe9adSAndreas Gruenbacher if (!peer_device)
56442735a594SAndreas Gruenbacher return -EIO;
56459f4fe9adSAndreas Gruenbacher device = peer_device->device;
56461952e916SAndreas Gruenbacher
56479f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5648b411b363SPhilipp Reisner
564969a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5650b411b363SPhilipp Reisner
5651b30ab791SAndreas Gruenbacher if (get_ldev(device)) {
5652b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector);
56530d11f3cfSChristoph Böhmwalder drbd_set_in_sync(peer_device, sector, blksize);
5654b411b363SPhilipp Reisner /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
5655b30ab791SAndreas Gruenbacher device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5656b30ab791SAndreas Gruenbacher put_ldev(device);
56571d53f09eSLars Ellenberg }
56580d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
5659b30ab791SAndreas Gruenbacher atomic_add(blksize >> 9, &device->rs_sect_in);
5660b411b363SPhilipp Reisner
56612735a594SAndreas Gruenbacher return 0;
5662b411b363SPhilipp Reisner }
5663b411b363SPhilipp Reisner
5664bc9c5c41SAndreas Gruenbacher static int
validate_req_change_req_state(struct drbd_peer_device * peer_device,u64 id,sector_t sector,struct rb_root * root,const char * func,enum drbd_req_event what,bool missing_ok)5665ad878a0dSChristoph Böhmwalder validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
5666bc9c5c41SAndreas Gruenbacher struct rb_root *root, const char *func,
5667bc9c5c41SAndreas Gruenbacher enum drbd_req_event what, bool missing_ok)
5668b411b363SPhilipp Reisner {
5669ad878a0dSChristoph Böhmwalder struct drbd_device *device = peer_device->device;
5670b411b363SPhilipp Reisner struct drbd_request *req;
5671b411b363SPhilipp Reisner struct bio_and_error m;
5672b411b363SPhilipp Reisner
56730500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock);
5674b30ab791SAndreas Gruenbacher req = find_request(device, root, id, sector, missing_ok, func);
5675b411b363SPhilipp Reisner if (unlikely(!req)) {
56760500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
567785997675SAndreas Gruenbacher return -EIO;
5678b411b363SPhilipp Reisner }
5679ad878a0dSChristoph Böhmwalder __req_mod(req, what, peer_device, &m);
56800500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock);
5681b411b363SPhilipp Reisner
5682b411b363SPhilipp Reisner if (m.bio)
5683b30ab791SAndreas Gruenbacher complete_master_bio(device, &m);
568485997675SAndreas Gruenbacher return 0;
5685b411b363SPhilipp Reisner }
5686b411b363SPhilipp Reisner
got_BlockAck(struct drbd_connection * connection,struct packet_info * pi)5687bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5688b411b363SPhilipp Reisner {
56899f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5690b30ab791SAndreas Gruenbacher struct drbd_device *device;
5691e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
5692b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector);
5693b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize);
5694b411b363SPhilipp Reisner enum drbd_req_event what;
5695b411b363SPhilipp Reisner
56969f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
56979f4fe9adSAndreas Gruenbacher if (!peer_device)
56982735a594SAndreas Gruenbacher return -EIO;
56999f4fe9adSAndreas Gruenbacher device = peer_device->device;
57001952e916SAndreas Gruenbacher
570169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5702b411b363SPhilipp Reisner
5703579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) {
57040d11f3cfSChristoph Böhmwalder drbd_set_in_sync(peer_device, sector, blksize);
57050d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
57062735a594SAndreas Gruenbacher return 0;
5707b411b363SPhilipp Reisner }
5708e05e1e59SAndreas Gruenbacher switch (pi->cmd) {
5709b411b363SPhilipp Reisner case P_RS_WRITE_ACK:
57108554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER_AND_SIS;
5711b411b363SPhilipp Reisner break;
5712b411b363SPhilipp Reisner case P_WRITE_ACK:
57138554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER;
5714b411b363SPhilipp Reisner break;
5715b411b363SPhilipp Reisner case P_RECV_ACK:
57168554df1cSAndreas Gruenbacher what = RECV_ACKED_BY_PEER;
5717b411b363SPhilipp Reisner break;
5718d4dabbe2SLars Ellenberg case P_SUPERSEDED:
5719d4dabbe2SLars Ellenberg what = CONFLICT_RESOLVED;
57207be8da07SAndreas Gruenbacher break;
57217be8da07SAndreas Gruenbacher case P_RETRY_WRITE:
57227be8da07SAndreas Gruenbacher what = POSTPONE_WRITE;
5723b411b363SPhilipp Reisner break;
5724b411b363SPhilipp Reisner default:
57252735a594SAndreas Gruenbacher BUG();
5726b411b363SPhilipp Reisner }
5727b411b363SPhilipp Reisner
5728ad878a0dSChristoph Böhmwalder return validate_req_change_req_state(peer_device, p->block_id, sector,
5729b30ab791SAndreas Gruenbacher &device->write_requests, __func__,
5730bc9c5c41SAndreas Gruenbacher what, false);
5731b411b363SPhilipp Reisner }
5732b411b363SPhilipp Reisner
got_NegAck(struct drbd_connection * connection,struct packet_info * pi)5733bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5734b411b363SPhilipp Reisner {
57359f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5736b30ab791SAndreas Gruenbacher struct drbd_device *device;
5737e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
5738b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector);
57392deb8336SPhilipp Reisner int size = be32_to_cpu(p->blksize);
574085997675SAndreas Gruenbacher int err;
5741b411b363SPhilipp Reisner
57429f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
57439f4fe9adSAndreas Gruenbacher if (!peer_device)
57442735a594SAndreas Gruenbacher return -EIO;
57459f4fe9adSAndreas Gruenbacher device = peer_device->device;
5746b411b363SPhilipp Reisner
574769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5748b411b363SPhilipp Reisner
5749579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) {
57500d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
57510d11f3cfSChristoph Böhmwalder drbd_rs_failed_io(peer_device, sector, size);
57522735a594SAndreas Gruenbacher return 0;
5753b411b363SPhilipp Reisner }
57542deb8336SPhilipp Reisner
5755ad878a0dSChristoph Böhmwalder err = validate_req_change_req_state(peer_device, p->block_id, sector,
5756b30ab791SAndreas Gruenbacher &device->write_requests, __func__,
5757303d1448SPhilipp Reisner NEG_ACKED, true);
575885997675SAndreas Gruenbacher if (err) {
57592deb8336SPhilipp Reisner /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
57602deb8336SPhilipp Reisner The master bio might already be completed, therefore the
5761c3afd8f5SAndreas Gruenbacher request is no longer in the collision hash. */
57622deb8336SPhilipp Reisner /* In Protocol B we might already have got a P_RECV_ACK
57632deb8336SPhilipp Reisner but then get a P_NEG_ACK afterwards. */
57640d11f3cfSChristoph Böhmwalder drbd_set_out_of_sync(peer_device, sector, size);
57652deb8336SPhilipp Reisner }
57662735a594SAndreas Gruenbacher return 0;
5767b411b363SPhilipp Reisner }
5768b411b363SPhilipp Reisner
got_NegDReply(struct drbd_connection * connection,struct packet_info * pi)5769bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5770b411b363SPhilipp Reisner {
57719f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5772b30ab791SAndreas Gruenbacher struct drbd_device *device;
5773e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
5774b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector);
5775b411b363SPhilipp Reisner
57769f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
57779f4fe9adSAndreas Gruenbacher if (!peer_device)
57782735a594SAndreas Gruenbacher return -EIO;
57799f4fe9adSAndreas Gruenbacher device = peer_device->device;
57801952e916SAndreas Gruenbacher
578169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
57827be8da07SAndreas Gruenbacher
5783d0180171SAndreas Gruenbacher drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5784b411b363SPhilipp Reisner (unsigned long long)sector, be32_to_cpu(p->blksize));
5785b411b363SPhilipp Reisner
5786ad878a0dSChristoph Böhmwalder return validate_req_change_req_state(peer_device, p->block_id, sector,
5787b30ab791SAndreas Gruenbacher &device->read_requests, __func__,
57888554df1cSAndreas Gruenbacher NEG_ACKED, false);
5789b411b363SPhilipp Reisner }
5790b411b363SPhilipp Reisner
got_NegRSDReply(struct drbd_connection * connection,struct packet_info * pi)5791bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5792b411b363SPhilipp Reisner {
57939f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5794b30ab791SAndreas Gruenbacher struct drbd_device *device;
5795b411b363SPhilipp Reisner sector_t sector;
5796b411b363SPhilipp Reisner int size;
5797e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
57981952e916SAndreas Gruenbacher
57999f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
58009f4fe9adSAndreas Gruenbacher if (!peer_device)
58012735a594SAndreas Gruenbacher return -EIO;
58029f4fe9adSAndreas Gruenbacher device = peer_device->device;
5803b411b363SPhilipp Reisner
5804b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
5805b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize);
5806b411b363SPhilipp Reisner
580769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5808b411b363SPhilipp Reisner
58090d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
5810b411b363SPhilipp Reisner
5811b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) {
5812b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector);
5813e05e1e59SAndreas Gruenbacher switch (pi->cmd) {
5814d612d309SPhilipp Reisner case P_NEG_RS_DREPLY:
58150d11f3cfSChristoph Böhmwalder drbd_rs_failed_io(peer_device, sector, size);
58166327c911SGustavo A. R. Silva break;
5817d612d309SPhilipp Reisner case P_RS_CANCEL:
5818d612d309SPhilipp Reisner break;
5819d612d309SPhilipp Reisner default:
58202735a594SAndreas Gruenbacher BUG();
5821d612d309SPhilipp Reisner }
5822b30ab791SAndreas Gruenbacher put_ldev(device);
5823b411b363SPhilipp Reisner }
5824b411b363SPhilipp Reisner
58252735a594SAndreas Gruenbacher return 0;
5826b411b363SPhilipp Reisner }
5827b411b363SPhilipp Reisner
got_BarrierAck(struct drbd_connection * connection,struct packet_info * pi)5828bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5829b411b363SPhilipp Reisner {
5830e658983aSAndreas Gruenbacher struct p_barrier_ack *p = pi->data;
5831c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device;
58329ed57dcbSLars Ellenberg int vnr;
5833b411b363SPhilipp Reisner
5834bde89a9eSAndreas Gruenbacher tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5835b411b363SPhilipp Reisner
58369ed57dcbSLars Ellenberg rcu_read_lock();
5837c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5838c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device;
5839c06ece6bSAndreas Gruenbacher
5840b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD &&
5841b30ab791SAndreas Gruenbacher atomic_read(&device->ap_in_flight) == 0 &&
5842b30ab791SAndreas Gruenbacher !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5843b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ;
5844b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer);
5845c4752ef1SPhilipp Reisner }
58469ed57dcbSLars Ellenberg }
58479ed57dcbSLars Ellenberg rcu_read_unlock();
5848c4752ef1SPhilipp Reisner
58492735a594SAndreas Gruenbacher return 0;
5850b411b363SPhilipp Reisner }
5851b411b363SPhilipp Reisner
got_OVResult(struct drbd_connection * connection,struct packet_info * pi)5852bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5853b411b363SPhilipp Reisner {
58549f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device;
5855b30ab791SAndreas Gruenbacher struct drbd_device *device;
5856e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data;
585784b8c06bSAndreas Gruenbacher struct drbd_device_work *dw;
5858b411b363SPhilipp Reisner sector_t sector;
5859b411b363SPhilipp Reisner int size;
5860b411b363SPhilipp Reisner
58619f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr);
58629f4fe9adSAndreas Gruenbacher if (!peer_device)
58632735a594SAndreas Gruenbacher return -EIO;
58649f4fe9adSAndreas Gruenbacher device = peer_device->device;
58651952e916SAndreas Gruenbacher
5866b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector);
5867b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize);
5868b411b363SPhilipp Reisner
586969a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5870b411b363SPhilipp Reisner
5871b411b363SPhilipp Reisner if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
58720d11f3cfSChristoph Böhmwalder drbd_ov_out_of_sync_found(peer_device, sector, size);
5873b411b363SPhilipp Reisner else
58740d11f3cfSChristoph Böhmwalder ov_out_of_sync_print(peer_device);
5875b411b363SPhilipp Reisner
5876b30ab791SAndreas Gruenbacher if (!get_ldev(device))
58772735a594SAndreas Gruenbacher return 0;
58781d53f09eSLars Ellenberg
5879b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector);
58800d11f3cfSChristoph Böhmwalder dec_rs_pending(peer_device);
5881b411b363SPhilipp Reisner
5882b30ab791SAndreas Gruenbacher --device->ov_left;
5883ea5442afSLars Ellenberg
5884ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */
5885b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200)
58860d11f3cfSChristoph Böhmwalder drbd_advance_rs_marks(peer_device, device->ov_left);
5887ea5442afSLars Ellenberg
5888b30ab791SAndreas Gruenbacher if (device->ov_left == 0) {
588984b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(*dw), GFP_NOIO);
589084b8c06bSAndreas Gruenbacher if (dw) {
589184b8c06bSAndreas Gruenbacher dw->w.cb = w_ov_finished;
589284b8c06bSAndreas Gruenbacher dw->device = device;
589384b8c06bSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5894b411b363SPhilipp Reisner } else {
589584b8c06bSAndreas Gruenbacher drbd_err(device, "kmalloc(dw) failed.");
58960d11f3cfSChristoph Böhmwalder ov_out_of_sync_print(peer_device);
58970d11f3cfSChristoph Böhmwalder drbd_resync_finished(peer_device);
5898b411b363SPhilipp Reisner }
5899b411b363SPhilipp Reisner }
5900b30ab791SAndreas Gruenbacher put_ldev(device);
59012735a594SAndreas Gruenbacher return 0;
5902b411b363SPhilipp Reisner }
5903b411b363SPhilipp Reisner
got_skip(struct drbd_connection * connection,struct packet_info * pi)5904bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
59050ced55a3SPhilipp Reisner {
59062735a594SAndreas Gruenbacher return 0;
59070ced55a3SPhilipp Reisner }
59080ced55a3SPhilipp Reisner
5909668700b4SPhilipp Reisner struct meta_sock_cmd {
5910b411b363SPhilipp Reisner size_t pkt_size;
5911bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *connection, struct packet_info *);
5912b411b363SPhilipp Reisner };
5913b411b363SPhilipp Reisner
set_rcvtimeo(struct drbd_connection * connection,bool ping_timeout)5914668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5915668700b4SPhilipp Reisner {
5916668700b4SPhilipp Reisner long t;
5917668700b4SPhilipp Reisner struct net_conf *nc;
5918668700b4SPhilipp Reisner
5919668700b4SPhilipp Reisner rcu_read_lock();
5920668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf);
5921668700b4SPhilipp Reisner t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5922668700b4SPhilipp Reisner rcu_read_unlock();
5923668700b4SPhilipp Reisner
5924668700b4SPhilipp Reisner t *= HZ;
5925668700b4SPhilipp Reisner if (ping_timeout)
5926668700b4SPhilipp Reisner t /= 10;
5927668700b4SPhilipp Reisner
5928668700b4SPhilipp Reisner connection->meta.socket->sk->sk_rcvtimeo = t;
5929668700b4SPhilipp Reisner }
5930668700b4SPhilipp Reisner
set_ping_timeout(struct drbd_connection * connection)5931668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection)
5932668700b4SPhilipp Reisner {
5933668700b4SPhilipp Reisner set_rcvtimeo(connection, 1);
5934668700b4SPhilipp Reisner }
5935668700b4SPhilipp Reisner
set_idle_timeout(struct drbd_connection * connection)5936668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection)
5937668700b4SPhilipp Reisner {
5938668700b4SPhilipp Reisner set_rcvtimeo(connection, 0);
5939668700b4SPhilipp Reisner }
5940668700b4SPhilipp Reisner
5941668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = {
5942e658983aSAndreas Gruenbacher [P_PING] = { 0, got_Ping },
5943e658983aSAndreas Gruenbacher [P_PING_ACK] = { 0, got_PingAck },
5944b411b363SPhilipp Reisner [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5945b411b363SPhilipp Reisner [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5946b411b363SPhilipp Reisner [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5947d4dabbe2SLars Ellenberg [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
5948b411b363SPhilipp Reisner [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5949b411b363SPhilipp Reisner [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5950b411b363SPhilipp Reisner [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5951b411b363SPhilipp Reisner [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5952b411b363SPhilipp Reisner [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5953b411b363SPhilipp Reisner [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5954b411b363SPhilipp Reisner [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
595502918be2SPhilipp Reisner [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5956d612d309SPhilipp Reisner [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
59571952e916SAndreas Gruenbacher [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
59581952e916SAndreas Gruenbacher [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
5959b411b363SPhilipp Reisner };
5960b411b363SPhilipp Reisner
drbd_ack_receiver(struct drbd_thread * thi)59611c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi)
5962b411b363SPhilipp Reisner {
5963bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection;
5964668700b4SPhilipp Reisner struct meta_sock_cmd *cmd = NULL;
596577351055SPhilipp Reisner struct packet_info pi;
5966668700b4SPhilipp Reisner unsigned long pre_recv_jif;
5967257d0af6SPhilipp Reisner int rv;
5968bde89a9eSAndreas Gruenbacher void *buf = connection->meta.rbuf;
5969b411b363SPhilipp Reisner int received = 0;
5970bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection);
597152b061a4SAndreas Gruenbacher int expect = header_size;
597244ed167dSPhilipp Reisner bool ping_timeout_active = false;
5973b411b363SPhilipp Reisner
59748b700983SPeter Zijlstra sched_set_fifo_low(current);
5975b411b363SPhilipp Reisner
5976e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) {
597780822284SPhilipp Reisner drbd_thread_current_set_cpu(thi);
597844ed167dSPhilipp Reisner
5979668700b4SPhilipp Reisner conn_reclaim_net_peer_reqs(connection);
598044ed167dSPhilipp Reisner
5981bde89a9eSAndreas Gruenbacher if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5982bde89a9eSAndreas Gruenbacher if (drbd_send_ping(connection)) {
59831ec861ebSAndreas Gruenbacher drbd_err(connection, "drbd_send_ping has failed\n");
5984841ce241SAndreas Gruenbacher goto reconnect;
5985841ce241SAndreas Gruenbacher }
5986668700b4SPhilipp Reisner set_ping_timeout(connection);
598744ed167dSPhilipp Reisner ping_timeout_active = true;
5988b411b363SPhilipp Reisner }
5989b411b363SPhilipp Reisner
5990668700b4SPhilipp Reisner pre_recv_jif = jiffies;
5991bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5992b411b363SPhilipp Reisner
5993b411b363SPhilipp Reisner /* Note:
5994b411b363SPhilipp Reisner * -EINTR (on meta) we got a signal
5995b411b363SPhilipp Reisner * -EAGAIN (on meta) rcvtimeo expired
5996b411b363SPhilipp Reisner * -ECONNRESET other side closed the connection
5997b411b363SPhilipp Reisner * -ERESTARTSYS (on data) we got a signal
5998b411b363SPhilipp Reisner * rv < 0 other than above: unexpected error!
5999b411b363SPhilipp Reisner * rv == expected: full header or command
6000b411b363SPhilipp Reisner * rv < expected: "woken" by signal during receive
6001b411b363SPhilipp Reisner * rv == 0 : "connection shut down by peer"
6002b411b363SPhilipp Reisner */
6003b411b363SPhilipp Reisner if (likely(rv > 0)) {
6004b411b363SPhilipp Reisner received += rv;
6005b411b363SPhilipp Reisner buf += rv;
6006b411b363SPhilipp Reisner } else if (rv == 0) {
6007bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) {
6008b66623e3SPhilipp Reisner long t;
6009b66623e3SPhilipp Reisner rcu_read_lock();
6010bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
6011b66623e3SPhilipp Reisner rcu_read_unlock();
6012b66623e3SPhilipp Reisner
6013bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait,
6014bde89a9eSAndreas Gruenbacher connection->cstate < C_WF_REPORT_PARAMS,
6015b66623e3SPhilipp Reisner t);
6016599377acSPhilipp Reisner if (t)
6017599377acSPhilipp Reisner break;
6018599377acSPhilipp Reisner }
60191ec861ebSAndreas Gruenbacher drbd_err(connection, "meta connection shut down by peer.\n");
6020b411b363SPhilipp Reisner goto reconnect;
6021b411b363SPhilipp Reisner } else if (rv == -EAGAIN) {
6022cb6518cbSLars Ellenberg /* If the data socket received something meanwhile,
6023cb6518cbSLars Ellenberg * that is good enough: peer is still alive. */
6024668700b4SPhilipp Reisner if (time_after(connection->last_received, pre_recv_jif))
6025cb6518cbSLars Ellenberg continue;
6026f36af18cSLars Ellenberg if (ping_timeout_active) {
60271ec861ebSAndreas Gruenbacher drbd_err(connection, "PingAck did not arrive in time.\n");
6028b411b363SPhilipp Reisner goto reconnect;
6029b411b363SPhilipp Reisner }
6030bde89a9eSAndreas Gruenbacher set_bit(SEND_PING, &connection->flags);
6031b411b363SPhilipp Reisner continue;
6032b411b363SPhilipp Reisner } else if (rv == -EINTR) {
6033668700b4SPhilipp Reisner /* maybe drbd_thread_stop(): the while condition will notice.
6034668700b4SPhilipp Reisner * maybe woken for send_ping: we'll send a ping above,
6035668700b4SPhilipp Reisner * and change the rcvtimeo */
6036668700b4SPhilipp Reisner flush_signals(current);
6037b411b363SPhilipp Reisner continue;
6038b411b363SPhilipp Reisner } else {
60391ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv);
6040b411b363SPhilipp Reisner goto reconnect;
6041b411b363SPhilipp Reisner }
6042b411b363SPhilipp Reisner
6043b411b363SPhilipp Reisner if (received == expect && cmd == NULL) {
6044bde89a9eSAndreas Gruenbacher if (decode_header(connection, connection->meta.rbuf, &pi))
6045b411b363SPhilipp Reisner goto reconnect;
6046668700b4SPhilipp Reisner cmd = &ack_receiver_tbl[pi.cmd];
6047668700b4SPhilipp Reisner if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
60481ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
60492fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd);
6050b411b363SPhilipp Reisner goto disconnect;
6051b411b363SPhilipp Reisner }
6052e658983aSAndreas Gruenbacher expect = header_size + cmd->pkt_size;
605352b061a4SAndreas Gruenbacher if (pi.size != expect - header_size) {
60541ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
605577351055SPhilipp Reisner pi.cmd, pi.size);
6056b411b363SPhilipp Reisner goto reconnect;
6057b411b363SPhilipp Reisner }
6058257d0af6SPhilipp Reisner }
6059b411b363SPhilipp Reisner if (received == expect) {
60602735a594SAndreas Gruenbacher bool err;
6061a4fbda8eSPhilipp Reisner
6062bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi);
60632735a594SAndreas Gruenbacher if (err) {
6064d75f773cSSakari Ailus drbd_err(connection, "%ps failed\n", cmd->fn);
6065b411b363SPhilipp Reisner goto reconnect;
60661952e916SAndreas Gruenbacher }
6067b411b363SPhilipp Reisner
6068bde89a9eSAndreas Gruenbacher connection->last_received = jiffies;
6069f36af18cSLars Ellenberg
6070668700b4SPhilipp Reisner if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
6071668700b4SPhilipp Reisner set_idle_timeout(connection);
607244ed167dSPhilipp Reisner ping_timeout_active = false;
607344ed167dSPhilipp Reisner }
6074b411b363SPhilipp Reisner
6075bde89a9eSAndreas Gruenbacher buf = connection->meta.rbuf;
6076b411b363SPhilipp Reisner received = 0;
607752b061a4SAndreas Gruenbacher expect = header_size;
6078b411b363SPhilipp Reisner cmd = NULL;
6079b411b363SPhilipp Reisner }
6080b411b363SPhilipp Reisner }
6081b411b363SPhilipp Reisner
6082b411b363SPhilipp Reisner if (0) {
6083b411b363SPhilipp Reisner reconnect:
6084bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6085bde89a9eSAndreas Gruenbacher conn_md_sync(connection);
6086b411b363SPhilipp Reisner }
6087b411b363SPhilipp Reisner if (0) {
6088b411b363SPhilipp Reisner disconnect:
6089bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6090b411b363SPhilipp Reisner }
6091b411b363SPhilipp Reisner
6092668700b4SPhilipp Reisner drbd_info(connection, "ack_receiver terminated\n");
6093b411b363SPhilipp Reisner
6094b411b363SPhilipp Reisner return 0;
6095b411b363SPhilipp Reisner }
6096668700b4SPhilipp Reisner
drbd_send_acks_wf(struct work_struct * ws)6097668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws)
6098668700b4SPhilipp Reisner {
6099668700b4SPhilipp Reisner struct drbd_peer_device *peer_device =
6100668700b4SPhilipp Reisner container_of(ws, struct drbd_peer_device, send_acks_work);
6101668700b4SPhilipp Reisner struct drbd_connection *connection = peer_device->connection;
6102668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device;
6103668700b4SPhilipp Reisner struct net_conf *nc;
6104668700b4SPhilipp Reisner int tcp_cork, err;
6105668700b4SPhilipp Reisner
6106668700b4SPhilipp Reisner rcu_read_lock();
6107668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf);
6108668700b4SPhilipp Reisner tcp_cork = nc->tcp_cork;
6109668700b4SPhilipp Reisner rcu_read_unlock();
6110668700b4SPhilipp Reisner
6111668700b4SPhilipp Reisner if (tcp_cork)
6112db10538aSChristoph Hellwig tcp_sock_set_cork(connection->meta.socket->sk, true);
6113668700b4SPhilipp Reisner
6114668700b4SPhilipp Reisner err = drbd_finish_peer_reqs(device);
6115668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device);
6116668700b4SPhilipp Reisner /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6117668700b4SPhilipp Reisner struct work_struct send_acks_work alive, which is in the peer_device object */
6118668700b4SPhilipp Reisner
6119668700b4SPhilipp Reisner if (err) {
6120668700b4SPhilipp Reisner conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6121668700b4SPhilipp Reisner return;
6122668700b4SPhilipp Reisner }
6123668700b4SPhilipp Reisner
6124668700b4SPhilipp Reisner if (tcp_cork)
6125db10538aSChristoph Hellwig tcp_sock_set_cork(connection->meta.socket->sk, false);
6126668700b4SPhilipp Reisner
6127668700b4SPhilipp Reisner return;
6128668700b4SPhilipp Reisner }
6129