1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_receiver.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner */ 24b411b363SPhilipp Reisner 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner 28b411b363SPhilipp Reisner #include <asm/uaccess.h> 29b411b363SPhilipp Reisner #include <net/sock.h> 30b411b363SPhilipp Reisner 31b411b363SPhilipp Reisner #include <linux/drbd.h> 32b411b363SPhilipp Reisner #include <linux/fs.h> 33b411b363SPhilipp Reisner #include <linux/file.h> 34b411b363SPhilipp Reisner #include <linux/in.h> 35b411b363SPhilipp Reisner #include <linux/mm.h> 36b411b363SPhilipp Reisner #include <linux/memcontrol.h> 37b411b363SPhilipp Reisner #include <linux/mm_inline.h> 38b411b363SPhilipp Reisner #include <linux/slab.h> 39b411b363SPhilipp Reisner #include <linux/pkt_sched.h> 40b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__ 41b411b363SPhilipp Reisner #include <linux/unistd.h> 42b411b363SPhilipp Reisner #include <linux/vmalloc.h> 43b411b363SPhilipp Reisner #include <linux/random.h> 44b411b363SPhilipp Reisner #include <linux/string.h> 45b411b363SPhilipp Reisner #include <linux/scatterlist.h> 46b411b363SPhilipp Reisner #include "drbd_int.h" 47a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 48b411b363SPhilipp Reisner #include "drbd_req.h" 49b411b363SPhilipp Reisner #include "drbd_vli.h" 50b411b363SPhilipp Reisner 5120c68fdeSLars Ellenberg #define PRO_FEATURES (FF_TRIM) 5220c68fdeSLars Ellenberg 5377351055SPhilipp Reisner struct packet_info { 5477351055SPhilipp Reisner enum drbd_packet cmd; 55e2857216SAndreas Gruenbacher unsigned int size; 56e2857216SAndreas Gruenbacher unsigned int vnr; 57e658983aSAndreas Gruenbacher void *data; 5877351055SPhilipp Reisner }; 5977351055SPhilipp Reisner 60b411b363SPhilipp Reisner enum finish_epoch { 61b411b363SPhilipp Reisner FE_STILL_LIVE, 62b411b363SPhilipp Reisner FE_DESTROYED, 63b411b363SPhilipp Reisner FE_RECYCLED, 64b411b363SPhilipp Reisner }; 65b411b363SPhilipp Reisner 66bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection); 67bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection); 6869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *); 69a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection); 70bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); 7199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int); 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner 74b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 75b411b363SPhilipp Reisner 7645bb912bSLars Ellenberg /* 7745bb912bSLars Ellenberg * some helper functions to deal with single linked page lists, 7845bb912bSLars Ellenberg * page->private being our "next" pointer. 7945bb912bSLars Ellenberg */ 8045bb912bSLars Ellenberg 8145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off. 8245bb912bSLars Ellenberg * Otherwise, don't modify head, and return NULL. 8345bb912bSLars Ellenberg * Locking is the responsibility of the caller. 8445bb912bSLars Ellenberg */ 8545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n) 8645bb912bSLars Ellenberg { 8745bb912bSLars Ellenberg struct page *page; 8845bb912bSLars Ellenberg struct page *tmp; 8945bb912bSLars Ellenberg 9045bb912bSLars Ellenberg BUG_ON(!n); 9145bb912bSLars Ellenberg BUG_ON(!head); 9245bb912bSLars Ellenberg 9345bb912bSLars Ellenberg page = *head; 9423ce4227SPhilipp Reisner 9523ce4227SPhilipp Reisner if (!page) 9623ce4227SPhilipp Reisner return NULL; 9723ce4227SPhilipp Reisner 9845bb912bSLars Ellenberg while (page) { 9945bb912bSLars Ellenberg tmp = page_chain_next(page); 10045bb912bSLars Ellenberg if (--n == 0) 10145bb912bSLars Ellenberg break; /* found sufficient pages */ 10245bb912bSLars Ellenberg if (tmp == NULL) 10345bb912bSLars Ellenberg /* insufficient pages, don't use any of them. */ 10445bb912bSLars Ellenberg return NULL; 10545bb912bSLars Ellenberg page = tmp; 10645bb912bSLars Ellenberg } 10745bb912bSLars Ellenberg 10845bb912bSLars Ellenberg /* add end of list marker for the returned list */ 10945bb912bSLars Ellenberg set_page_private(page, 0); 11045bb912bSLars Ellenberg /* actual return value, and adjustment of head */ 11145bb912bSLars Ellenberg page = *head; 11245bb912bSLars Ellenberg *head = tmp; 11345bb912bSLars Ellenberg return page; 11445bb912bSLars Ellenberg } 11545bb912bSLars Ellenberg 11645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short) 11745bb912bSLars Ellenberg * "private" page chain, before adding it back to a global chain head 11845bb912bSLars Ellenberg * with page_chain_add() under a spinlock. */ 11945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len) 12045bb912bSLars Ellenberg { 12145bb912bSLars Ellenberg struct page *tmp; 12245bb912bSLars Ellenberg int i = 1; 12345bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) 12445bb912bSLars Ellenberg ++i, page = tmp; 12545bb912bSLars Ellenberg if (len) 12645bb912bSLars Ellenberg *len = i; 12745bb912bSLars Ellenberg return page; 12845bb912bSLars Ellenberg } 12945bb912bSLars Ellenberg 13045bb912bSLars Ellenberg static int page_chain_free(struct page *page) 13145bb912bSLars Ellenberg { 13245bb912bSLars Ellenberg struct page *tmp; 13345bb912bSLars Ellenberg int i = 0; 13445bb912bSLars Ellenberg page_chain_for_each_safe(page, tmp) { 13545bb912bSLars Ellenberg put_page(page); 13645bb912bSLars Ellenberg ++i; 13745bb912bSLars Ellenberg } 13845bb912bSLars Ellenberg return i; 13945bb912bSLars Ellenberg } 14045bb912bSLars Ellenberg 14145bb912bSLars Ellenberg static void page_chain_add(struct page **head, 14245bb912bSLars Ellenberg struct page *chain_first, struct page *chain_last) 14345bb912bSLars Ellenberg { 14445bb912bSLars Ellenberg #if 1 14545bb912bSLars Ellenberg struct page *tmp; 14645bb912bSLars Ellenberg tmp = page_chain_tail(chain_first, NULL); 14745bb912bSLars Ellenberg BUG_ON(tmp != chain_last); 14845bb912bSLars Ellenberg #endif 14945bb912bSLars Ellenberg 15045bb912bSLars Ellenberg /* add chain to head */ 15145bb912bSLars Ellenberg set_page_private(chain_last, (unsigned long)*head); 15245bb912bSLars Ellenberg *head = chain_first; 15345bb912bSLars Ellenberg } 15445bb912bSLars Ellenberg 155b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device, 15618c2d522SAndreas Gruenbacher unsigned int number) 157b411b363SPhilipp Reisner { 158b411b363SPhilipp Reisner struct page *page = NULL; 15945bb912bSLars Ellenberg struct page *tmp = NULL; 16018c2d522SAndreas Gruenbacher unsigned int i = 0; 161b411b363SPhilipp Reisner 162b411b363SPhilipp Reisner /* Yes, testing drbd_pp_vacant outside the lock is racy. 163b411b363SPhilipp Reisner * So what. It saves a spin_lock. */ 16445bb912bSLars Ellenberg if (drbd_pp_vacant >= number) { 165b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 16645bb912bSLars Ellenberg page = page_chain_del(&drbd_pp_pool, number); 16745bb912bSLars Ellenberg if (page) 16845bb912bSLars Ellenberg drbd_pp_vacant -= number; 169b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 17045bb912bSLars Ellenberg if (page) 17145bb912bSLars Ellenberg return page; 172b411b363SPhilipp Reisner } 17345bb912bSLars Ellenberg 174b411b363SPhilipp Reisner /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD 175b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 176b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 17745bb912bSLars Ellenberg for (i = 0; i < number; i++) { 17845bb912bSLars Ellenberg tmp = alloc_page(GFP_TRY); 17945bb912bSLars Ellenberg if (!tmp) 18045bb912bSLars Ellenberg break; 18145bb912bSLars Ellenberg set_page_private(tmp, (unsigned long)page); 18245bb912bSLars Ellenberg page = tmp; 18345bb912bSLars Ellenberg } 18445bb912bSLars Ellenberg 18545bb912bSLars Ellenberg if (i == number) 186b411b363SPhilipp Reisner return page; 18745bb912bSLars Ellenberg 18845bb912bSLars Ellenberg /* Not enough pages immediately available this time. 189c37c8ecfSAndreas Gruenbacher * No need to jump around here, drbd_alloc_pages will retry this 19045bb912bSLars Ellenberg * function "soon". */ 19145bb912bSLars Ellenberg if (page) { 19245bb912bSLars Ellenberg tmp = page_chain_tail(page, NULL); 19345bb912bSLars Ellenberg spin_lock(&drbd_pp_lock); 19445bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 19545bb912bSLars Ellenberg drbd_pp_vacant += i; 19645bb912bSLars Ellenberg spin_unlock(&drbd_pp_lock); 19745bb912bSLars Ellenberg } 19845bb912bSLars Ellenberg return NULL; 199b411b363SPhilipp Reisner } 200b411b363SPhilipp Reisner 201b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device, 202a990be46SAndreas Gruenbacher struct list_head *to_be_freed) 203b411b363SPhilipp Reisner { 204a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req, *tmp; 205b411b363SPhilipp Reisner 206b411b363SPhilipp Reisner /* The EEs are always appended to the end of the list. Since 207b411b363SPhilipp Reisner they are sent in order over the wire, they have to finish 208b411b363SPhilipp Reisner in order. As soon as we see the first not finished we can 209b411b363SPhilipp Reisner stop to examine the list... */ 210b411b363SPhilipp Reisner 211a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { 212045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) 213b411b363SPhilipp Reisner break; 214a8cd15baSAndreas Gruenbacher list_move(&peer_req->w.list, to_be_freed); 215b411b363SPhilipp Reisner } 216b411b363SPhilipp Reisner } 217b411b363SPhilipp Reisner 218b30ab791SAndreas Gruenbacher static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device) 219b411b363SPhilipp Reisner { 220b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 221db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 222b411b363SPhilipp Reisner 2230500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 224b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 2250500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 226b411b363SPhilipp Reisner 227a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 228b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 229b411b363SPhilipp Reisner } 230b411b363SPhilipp Reisner 231b411b363SPhilipp Reisner /** 232c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) 233b30ab791SAndreas Gruenbacher * @device: DRBD device. 23445bb912bSLars Ellenberg * @number: number of pages requested 23545bb912bSLars Ellenberg * @retry: whether to retry, if not enough pages are available right now 236b411b363SPhilipp Reisner * 23745bb912bSLars Ellenberg * Tries to allocate number pages, first from our own page pool, then from 2380e49d7b0SLars Ellenberg * the kernel. 23945bb912bSLars Ellenberg * Possibly retry until DRBD frees sufficient pages somewhere else. 24045bb912bSLars Ellenberg * 2410e49d7b0SLars Ellenberg * If this allocation would exceed the max_buffers setting, we throttle 2420e49d7b0SLars Ellenberg * allocation (schedule_timeout) to give the system some room to breathe. 2430e49d7b0SLars Ellenberg * 2440e49d7b0SLars Ellenberg * We do not use max-buffers as hard limit, because it could lead to 2450e49d7b0SLars Ellenberg * congestion and further to a distributed deadlock during online-verify or 2460e49d7b0SLars Ellenberg * (checksum based) resync, if the max-buffers, socket buffer sizes and 2470e49d7b0SLars Ellenberg * resync-rate settings are mis-configured. 2480e49d7b0SLars Ellenberg * 24945bb912bSLars Ellenberg * Returns a page chain linked via page->private. 250b411b363SPhilipp Reisner */ 25169a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, 252c37c8ecfSAndreas Gruenbacher bool retry) 253b411b363SPhilipp Reisner { 25469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 255b411b363SPhilipp Reisner struct page *page = NULL; 25644ed167dSPhilipp Reisner struct net_conf *nc; 257b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2580e49d7b0SLars Ellenberg unsigned int mxb; 259b411b363SPhilipp Reisner 26044ed167dSPhilipp Reisner rcu_read_lock(); 26169a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 26244ed167dSPhilipp Reisner mxb = nc ? nc->max_buffers : 1000000; 26344ed167dSPhilipp Reisner rcu_read_unlock(); 26444ed167dSPhilipp Reisner 265b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) 266b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 267b411b363SPhilipp Reisner 26845bb912bSLars Ellenberg while (page == NULL) { 269b411b363SPhilipp Reisner prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 270b411b363SPhilipp Reisner 271b30ab791SAndreas Gruenbacher drbd_kick_lo_and_reclaim_net(device); 272b411b363SPhilipp Reisner 273b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) { 274b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 275b411b363SPhilipp Reisner if (page) 276b411b363SPhilipp Reisner break; 277b411b363SPhilipp Reisner } 278b411b363SPhilipp Reisner 279b411b363SPhilipp Reisner if (!retry) 280b411b363SPhilipp Reisner break; 281b411b363SPhilipp Reisner 282b411b363SPhilipp Reisner if (signal_pending(current)) { 283d0180171SAndreas Gruenbacher drbd_warn(device, "drbd_alloc_pages interrupted!\n"); 284b411b363SPhilipp Reisner break; 285b411b363SPhilipp Reisner } 286b411b363SPhilipp Reisner 2870e49d7b0SLars Ellenberg if (schedule_timeout(HZ/10) == 0) 2880e49d7b0SLars Ellenberg mxb = UINT_MAX; 289b411b363SPhilipp Reisner } 290b411b363SPhilipp Reisner finish_wait(&drbd_pp_wait, &wait); 291b411b363SPhilipp Reisner 29245bb912bSLars Ellenberg if (page) 293b30ab791SAndreas Gruenbacher atomic_add(number, &device->pp_in_use); 294b411b363SPhilipp Reisner return page; 295b411b363SPhilipp Reisner } 296b411b363SPhilipp Reisner 297c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. 2980500813fSAndreas Gruenbacher * Is also used from inside an other spin_lock_irq(&resource->req_lock); 29945bb912bSLars Ellenberg * Either links the page chain back to the global pool, 30045bb912bSLars Ellenberg * or returns all pages to the system. */ 301b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) 302b411b363SPhilipp Reisner { 303b30ab791SAndreas Gruenbacher atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; 304b411b363SPhilipp Reisner int i; 305435f0740SLars Ellenberg 306a73ff323SLars Ellenberg if (page == NULL) 307a73ff323SLars Ellenberg return; 308a73ff323SLars Ellenberg 3091816a2b4SLars Ellenberg if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) 31045bb912bSLars Ellenberg i = page_chain_free(page); 31145bb912bSLars Ellenberg else { 31245bb912bSLars Ellenberg struct page *tmp; 31345bb912bSLars Ellenberg tmp = page_chain_tail(page, &i); 314b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 31545bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 31645bb912bSLars Ellenberg drbd_pp_vacant += i; 317b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 318b411b363SPhilipp Reisner } 319435f0740SLars Ellenberg i = atomic_sub_return(i, a); 32045bb912bSLars Ellenberg if (i < 0) 321d0180171SAndreas Gruenbacher drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", 322435f0740SLars Ellenberg is_net ? "pp_in_use_by_net" : "pp_in_use", i); 323b411b363SPhilipp Reisner wake_up(&drbd_pp_wait); 324b411b363SPhilipp Reisner } 325b411b363SPhilipp Reisner 326b411b363SPhilipp Reisner /* 327b411b363SPhilipp Reisner You need to hold the req_lock: 328b411b363SPhilipp Reisner _drbd_wait_ee_list_empty() 329b411b363SPhilipp Reisner 330b411b363SPhilipp Reisner You must not have the req_lock: 3313967deb1SAndreas Gruenbacher drbd_free_peer_req() 3320db55363SAndreas Gruenbacher drbd_alloc_peer_req() 3337721f567SAndreas Gruenbacher drbd_free_peer_reqs() 334b411b363SPhilipp Reisner drbd_ee_fix_bhs() 335a990be46SAndreas Gruenbacher drbd_finish_peer_reqs() 336b411b363SPhilipp Reisner drbd_clear_done_ee() 337b411b363SPhilipp Reisner drbd_wait_ee_list_empty() 338b411b363SPhilipp Reisner */ 339b411b363SPhilipp Reisner 340f6ffca9fSAndreas Gruenbacher struct drbd_peer_request * 34169a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 342a0fb3c47SLars Ellenberg unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local) 343b411b363SPhilipp Reisner { 34469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 345db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 346a73ff323SLars Ellenberg struct page *page = NULL; 34745bb912bSLars Ellenberg unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 348b411b363SPhilipp Reisner 349b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) 350b411b363SPhilipp Reisner return NULL; 351b411b363SPhilipp Reisner 352db830c46SAndreas Gruenbacher peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 353db830c46SAndreas Gruenbacher if (!peer_req) { 354b411b363SPhilipp Reisner if (!(gfp_mask & __GFP_NOWARN)) 355d0180171SAndreas Gruenbacher drbd_err(device, "%s: allocation failed\n", __func__); 356b411b363SPhilipp Reisner return NULL; 357b411b363SPhilipp Reisner } 358b411b363SPhilipp Reisner 359a0fb3c47SLars Ellenberg if (has_payload && data_size) { 36069a22773SAndreas Gruenbacher page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); 36145bb912bSLars Ellenberg if (!page) 36245bb912bSLars Ellenberg goto fail; 363a73ff323SLars Ellenberg } 364b411b363SPhilipp Reisner 365db830c46SAndreas Gruenbacher drbd_clear_interval(&peer_req->i); 366db830c46SAndreas Gruenbacher peer_req->i.size = data_size; 367db830c46SAndreas Gruenbacher peer_req->i.sector = sector; 368db830c46SAndreas Gruenbacher peer_req->i.local = false; 369db830c46SAndreas Gruenbacher peer_req->i.waiting = false; 370b411b363SPhilipp Reisner 371db830c46SAndreas Gruenbacher peer_req->epoch = NULL; 372a8cd15baSAndreas Gruenbacher peer_req->peer_device = peer_device; 373db830c46SAndreas Gruenbacher peer_req->pages = page; 374db830c46SAndreas Gruenbacher atomic_set(&peer_req->pending_bios, 0); 375db830c46SAndreas Gruenbacher peer_req->flags = 0; 3769a8e7753SAndreas Gruenbacher /* 3779a8e7753SAndreas Gruenbacher * The block_id is opaque to the receiver. It is not endianness 3789a8e7753SAndreas Gruenbacher * converted, and sent back to the sender unchanged. 3799a8e7753SAndreas Gruenbacher */ 380db830c46SAndreas Gruenbacher peer_req->block_id = id; 381b411b363SPhilipp Reisner 382db830c46SAndreas Gruenbacher return peer_req; 383b411b363SPhilipp Reisner 38445bb912bSLars Ellenberg fail: 385db830c46SAndreas Gruenbacher mempool_free(peer_req, drbd_ee_mempool); 386b411b363SPhilipp Reisner return NULL; 387b411b363SPhilipp Reisner } 388b411b363SPhilipp Reisner 389b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 390f6ffca9fSAndreas Gruenbacher int is_net) 391b411b363SPhilipp Reisner { 392db830c46SAndreas Gruenbacher if (peer_req->flags & EE_HAS_DIGEST) 393db830c46SAndreas Gruenbacher kfree(peer_req->digest); 394b30ab791SAndreas Gruenbacher drbd_free_pages(device, peer_req->pages, is_net); 3950b0ba1efSAndreas Gruenbacher D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 3960b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 397db830c46SAndreas Gruenbacher mempool_free(peer_req, drbd_ee_mempool); 398b411b363SPhilipp Reisner } 399b411b363SPhilipp Reisner 400b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) 401b411b363SPhilipp Reisner { 402b411b363SPhilipp Reisner LIST_HEAD(work_list); 403db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 404b411b363SPhilipp Reisner int count = 0; 405b30ab791SAndreas Gruenbacher int is_net = list == &device->net_ee; 406b411b363SPhilipp Reisner 4070500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 408b411b363SPhilipp Reisner list_splice_init(list, &work_list); 4090500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 410b411b363SPhilipp Reisner 411a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 412b30ab791SAndreas Gruenbacher __drbd_free_peer_req(device, peer_req, is_net); 413b411b363SPhilipp Reisner count++; 414b411b363SPhilipp Reisner } 415b411b363SPhilipp Reisner return count; 416b411b363SPhilipp Reisner } 417b411b363SPhilipp Reisner 418b411b363SPhilipp Reisner /* 419a990be46SAndreas Gruenbacher * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. 420b411b363SPhilipp Reisner */ 421b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device) 422b411b363SPhilipp Reisner { 423b411b363SPhilipp Reisner LIST_HEAD(work_list); 424b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 425db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 426e2b3032bSAndreas Gruenbacher int err = 0; 427b411b363SPhilipp Reisner 4280500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 429b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 430b30ab791SAndreas Gruenbacher list_splice_init(&device->done_ee, &work_list); 4310500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 432b411b363SPhilipp Reisner 433a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 434b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 435b411b363SPhilipp Reisner 436b411b363SPhilipp Reisner /* possible callbacks here: 437d4dabbe2SLars Ellenberg * e_end_block, and e_end_resync_block, e_send_superseded. 438b411b363SPhilipp Reisner * all ignore the last argument. 439b411b363SPhilipp Reisner */ 440a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 441e2b3032bSAndreas Gruenbacher int err2; 442e2b3032bSAndreas Gruenbacher 443b411b363SPhilipp Reisner /* list_del not necessary, next/prev members not touched */ 444a8cd15baSAndreas Gruenbacher err2 = peer_req->w.cb(&peer_req->w, !!err); 445e2b3032bSAndreas Gruenbacher if (!err) 446e2b3032bSAndreas Gruenbacher err = err2; 447b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 448b411b363SPhilipp Reisner } 449b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 450b411b363SPhilipp Reisner 451e2b3032bSAndreas Gruenbacher return err; 452b411b363SPhilipp Reisner } 453b411b363SPhilipp Reisner 454b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device, 455d4da1537SAndreas Gruenbacher struct list_head *head) 456b411b363SPhilipp Reisner { 457b411b363SPhilipp Reisner DEFINE_WAIT(wait); 458b411b363SPhilipp Reisner 459b411b363SPhilipp Reisner /* avoids spin_lock/unlock 460b411b363SPhilipp Reisner * and calling prepare_to_wait in the fast path */ 461b411b363SPhilipp Reisner while (!list_empty(head)) { 462b30ab791SAndreas Gruenbacher prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 4630500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4647eaceaccSJens Axboe io_schedule(); 465b30ab791SAndreas Gruenbacher finish_wait(&device->ee_wait, &wait); 4660500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 467b411b363SPhilipp Reisner } 468b411b363SPhilipp Reisner } 469b411b363SPhilipp Reisner 470b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device, 471d4da1537SAndreas Gruenbacher struct list_head *head) 472b411b363SPhilipp Reisner { 4730500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 474b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, head); 4750500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 476b411b363SPhilipp Reisner } 477b411b363SPhilipp Reisner 478dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 479b411b363SPhilipp Reisner { 480b411b363SPhilipp Reisner struct kvec iov = { 481b411b363SPhilipp Reisner .iov_base = buf, 482b411b363SPhilipp Reisner .iov_len = size, 483b411b363SPhilipp Reisner }; 484b411b363SPhilipp Reisner struct msghdr msg = { 485b411b363SPhilipp Reisner .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 486b411b363SPhilipp Reisner }; 487f730c848SAl Viro return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags); 488b411b363SPhilipp Reisner } 489b411b363SPhilipp Reisner 490bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 491b411b363SPhilipp Reisner { 492b411b363SPhilipp Reisner int rv; 493b411b363SPhilipp Reisner 494bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->data.socket, buf, size, 0); 495b411b363SPhilipp Reisner 496b411b363SPhilipp Reisner if (rv < 0) { 497b411b363SPhilipp Reisner if (rv == -ECONNRESET) 4981ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was reset by peer\n"); 499b411b363SPhilipp Reisner else if (rv != -ERESTARTSYS) 5001ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 501b411b363SPhilipp Reisner } else if (rv == 0) { 502bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 503b66623e3SPhilipp Reisner long t; 504b66623e3SPhilipp Reisner rcu_read_lock(); 505bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 506b66623e3SPhilipp Reisner rcu_read_unlock(); 507b66623e3SPhilipp Reisner 508bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); 509b66623e3SPhilipp Reisner 510599377acSPhilipp Reisner if (t) 511599377acSPhilipp Reisner goto out; 512599377acSPhilipp Reisner } 5131ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was shut down by peer\n"); 514599377acSPhilipp Reisner } 515599377acSPhilipp Reisner 516b411b363SPhilipp Reisner if (rv != size) 517bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 518b411b363SPhilipp Reisner 519599377acSPhilipp Reisner out: 520b411b363SPhilipp Reisner return rv; 521b411b363SPhilipp Reisner } 522b411b363SPhilipp Reisner 523bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) 524c6967746SAndreas Gruenbacher { 525c6967746SAndreas Gruenbacher int err; 526c6967746SAndreas Gruenbacher 527bde89a9eSAndreas Gruenbacher err = drbd_recv(connection, buf, size); 528c6967746SAndreas Gruenbacher if (err != size) { 529c6967746SAndreas Gruenbacher if (err >= 0) 530c6967746SAndreas Gruenbacher err = -EIO; 531c6967746SAndreas Gruenbacher } else 532c6967746SAndreas Gruenbacher err = 0; 533c6967746SAndreas Gruenbacher return err; 534c6967746SAndreas Gruenbacher } 535c6967746SAndreas Gruenbacher 536bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) 537a5c31904SAndreas Gruenbacher { 538a5c31904SAndreas Gruenbacher int err; 539a5c31904SAndreas Gruenbacher 540bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, buf, size); 541a5c31904SAndreas Gruenbacher if (err && !signal_pending(current)) 5421ec861ebSAndreas Gruenbacher drbd_warn(connection, "short read (expected size %d)\n", (int)size); 543a5c31904SAndreas Gruenbacher return err; 544a5c31904SAndreas Gruenbacher } 545a5c31904SAndreas Gruenbacher 5465dbf1673SLars Ellenberg /* quoting tcp(7): 5475dbf1673SLars Ellenberg * On individual connections, the socket buffer size must be set prior to the 5485dbf1673SLars Ellenberg * listen(2) or connect(2) calls in order to have it take effect. 5495dbf1673SLars Ellenberg * This is our wrapper to do so. 5505dbf1673SLars Ellenberg */ 5515dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd, 5525dbf1673SLars Ellenberg unsigned int rcv) 5535dbf1673SLars Ellenberg { 5545dbf1673SLars Ellenberg /* open coded SO_SNDBUF, SO_RCVBUF */ 5555dbf1673SLars Ellenberg if (snd) { 5565dbf1673SLars Ellenberg sock->sk->sk_sndbuf = snd; 5575dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5585dbf1673SLars Ellenberg } 5595dbf1673SLars Ellenberg if (rcv) { 5605dbf1673SLars Ellenberg sock->sk->sk_rcvbuf = rcv; 5615dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5625dbf1673SLars Ellenberg } 5635dbf1673SLars Ellenberg } 5645dbf1673SLars Ellenberg 565bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection) 566b411b363SPhilipp Reisner { 567b411b363SPhilipp Reisner const char *what; 568b411b363SPhilipp Reisner struct socket *sock; 569b411b363SPhilipp Reisner struct sockaddr_in6 src_in6; 57044ed167dSPhilipp Reisner struct sockaddr_in6 peer_in6; 57144ed167dSPhilipp Reisner struct net_conf *nc; 57244ed167dSPhilipp Reisner int err, peer_addr_len, my_addr_len; 57369ef82deSAndreas Gruenbacher int sndbuf_size, rcvbuf_size, connect_int; 574b411b363SPhilipp Reisner int disconnect_on_error = 1; 575b411b363SPhilipp Reisner 57644ed167dSPhilipp Reisner rcu_read_lock(); 577bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 57844ed167dSPhilipp Reisner if (!nc) { 57944ed167dSPhilipp Reisner rcu_read_unlock(); 580b411b363SPhilipp Reisner return NULL; 58144ed167dSPhilipp Reisner } 58244ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 58344ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 58469ef82deSAndreas Gruenbacher connect_int = nc->connect_int; 585089c075dSAndreas Gruenbacher rcu_read_unlock(); 58644ed167dSPhilipp Reisner 587bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); 588bde89a9eSAndreas Gruenbacher memcpy(&src_in6, &connection->my_addr, my_addr_len); 58944ed167dSPhilipp Reisner 590bde89a9eSAndreas Gruenbacher if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) 59144ed167dSPhilipp Reisner src_in6.sin6_port = 0; 59244ed167dSPhilipp Reisner else 59344ed167dSPhilipp Reisner ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ 59444ed167dSPhilipp Reisner 595bde89a9eSAndreas Gruenbacher peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); 596bde89a9eSAndreas Gruenbacher memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); 597b411b363SPhilipp Reisner 598b411b363SPhilipp Reisner what = "sock_create_kern"; 59944ed167dSPhilipp Reisner err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, 600b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &sock); 601b411b363SPhilipp Reisner if (err < 0) { 602b411b363SPhilipp Reisner sock = NULL; 603b411b363SPhilipp Reisner goto out; 604b411b363SPhilipp Reisner } 605b411b363SPhilipp Reisner 606b411b363SPhilipp Reisner sock->sk->sk_rcvtimeo = 60769ef82deSAndreas Gruenbacher sock->sk->sk_sndtimeo = connect_int * HZ; 60844ed167dSPhilipp Reisner drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); 609b411b363SPhilipp Reisner 610b411b363SPhilipp Reisner /* explicitly bind to the configured IP as source IP 611b411b363SPhilipp Reisner * for the outgoing connections. 612b411b363SPhilipp Reisner * This is needed for multihomed hosts and to be 613b411b363SPhilipp Reisner * able to use lo: interfaces for drbd. 614b411b363SPhilipp Reisner * Make sure to use 0 as port number, so linux selects 615b411b363SPhilipp Reisner * a free one dynamically. 616b411b363SPhilipp Reisner */ 617b411b363SPhilipp Reisner what = "bind before connect"; 61844ed167dSPhilipp Reisner err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); 619b411b363SPhilipp Reisner if (err < 0) 620b411b363SPhilipp Reisner goto out; 621b411b363SPhilipp Reisner 622b411b363SPhilipp Reisner /* connect may fail, peer not yet available. 623b411b363SPhilipp Reisner * stay C_WF_CONNECTION, don't go Disconnecting! */ 624b411b363SPhilipp Reisner disconnect_on_error = 0; 625b411b363SPhilipp Reisner what = "connect"; 62644ed167dSPhilipp Reisner err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); 627b411b363SPhilipp Reisner 628b411b363SPhilipp Reisner out: 629b411b363SPhilipp Reisner if (err < 0) { 630b411b363SPhilipp Reisner if (sock) { 631b411b363SPhilipp Reisner sock_release(sock); 632b411b363SPhilipp Reisner sock = NULL; 633b411b363SPhilipp Reisner } 634b411b363SPhilipp Reisner switch (-err) { 635b411b363SPhilipp Reisner /* timeout, busy, signal pending */ 636b411b363SPhilipp Reisner case ETIMEDOUT: case EAGAIN: case EINPROGRESS: 637b411b363SPhilipp Reisner case EINTR: case ERESTARTSYS: 638b411b363SPhilipp Reisner /* peer not (yet) available, network problem */ 639b411b363SPhilipp Reisner case ECONNREFUSED: case ENETUNREACH: 640b411b363SPhilipp Reisner case EHOSTDOWN: case EHOSTUNREACH: 641b411b363SPhilipp Reisner disconnect_on_error = 0; 642b411b363SPhilipp Reisner break; 643b411b363SPhilipp Reisner default: 6441ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 645b411b363SPhilipp Reisner } 646b411b363SPhilipp Reisner if (disconnect_on_error) 647bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 648b411b363SPhilipp Reisner } 64944ed167dSPhilipp Reisner 650b411b363SPhilipp Reisner return sock; 651b411b363SPhilipp Reisner } 652b411b363SPhilipp Reisner 6537a426fd8SPhilipp Reisner struct accept_wait_data { 654bde89a9eSAndreas Gruenbacher struct drbd_connection *connection; 6557a426fd8SPhilipp Reisner struct socket *s_listen; 6567a426fd8SPhilipp Reisner struct completion door_bell; 6577a426fd8SPhilipp Reisner void (*original_sk_state_change)(struct sock *sk); 6587a426fd8SPhilipp Reisner 6597a426fd8SPhilipp Reisner }; 6607a426fd8SPhilipp Reisner 661715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk) 662b411b363SPhilipp Reisner { 6637a426fd8SPhilipp Reisner struct accept_wait_data *ad = sk->sk_user_data; 664715306f6SAndreas Gruenbacher void (*state_change)(struct sock *sk); 6657a426fd8SPhilipp Reisner 666715306f6SAndreas Gruenbacher state_change = ad->original_sk_state_change; 667715306f6SAndreas Gruenbacher if (sk->sk_state == TCP_ESTABLISHED) 6687a426fd8SPhilipp Reisner complete(&ad->door_bell); 669715306f6SAndreas Gruenbacher state_change(sk); 6707a426fd8SPhilipp Reisner } 6717a426fd8SPhilipp Reisner 672bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) 673b411b363SPhilipp Reisner { 6741f3e509bSPhilipp Reisner int err, sndbuf_size, rcvbuf_size, my_addr_len; 67544ed167dSPhilipp Reisner struct sockaddr_in6 my_addr; 6761f3e509bSPhilipp Reisner struct socket *s_listen; 67744ed167dSPhilipp Reisner struct net_conf *nc; 678b411b363SPhilipp Reisner const char *what; 679b411b363SPhilipp Reisner 68044ed167dSPhilipp Reisner rcu_read_lock(); 681bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 68244ed167dSPhilipp Reisner if (!nc) { 68344ed167dSPhilipp Reisner rcu_read_unlock(); 6847a426fd8SPhilipp Reisner return -EIO; 68544ed167dSPhilipp Reisner } 68644ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 68744ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 68844ed167dSPhilipp Reisner rcu_read_unlock(); 689b411b363SPhilipp Reisner 690bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); 691bde89a9eSAndreas Gruenbacher memcpy(&my_addr, &connection->my_addr, my_addr_len); 692b411b363SPhilipp Reisner 693b411b363SPhilipp Reisner what = "sock_create_kern"; 69444ed167dSPhilipp Reisner err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, 695b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &s_listen); 696b411b363SPhilipp Reisner if (err) { 697b411b363SPhilipp Reisner s_listen = NULL; 698b411b363SPhilipp Reisner goto out; 699b411b363SPhilipp Reisner } 700b411b363SPhilipp Reisner 7014a17fd52SPavel Emelyanov s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 70244ed167dSPhilipp Reisner drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); 703b411b363SPhilipp Reisner 704b411b363SPhilipp Reisner what = "bind before listen"; 70544ed167dSPhilipp Reisner err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); 706b411b363SPhilipp Reisner if (err < 0) 707b411b363SPhilipp Reisner goto out; 708b411b363SPhilipp Reisner 7097a426fd8SPhilipp Reisner ad->s_listen = s_listen; 7107a426fd8SPhilipp Reisner write_lock_bh(&s_listen->sk->sk_callback_lock); 7117a426fd8SPhilipp Reisner ad->original_sk_state_change = s_listen->sk->sk_state_change; 712715306f6SAndreas Gruenbacher s_listen->sk->sk_state_change = drbd_incoming_connection; 7137a426fd8SPhilipp Reisner s_listen->sk->sk_user_data = ad; 7147a426fd8SPhilipp Reisner write_unlock_bh(&s_listen->sk->sk_callback_lock); 715b411b363SPhilipp Reisner 7162820fd39SPhilipp Reisner what = "listen"; 7172820fd39SPhilipp Reisner err = s_listen->ops->listen(s_listen, 5); 7182820fd39SPhilipp Reisner if (err < 0) 7192820fd39SPhilipp Reisner goto out; 7202820fd39SPhilipp Reisner 7217a426fd8SPhilipp Reisner return 0; 722b411b363SPhilipp Reisner out: 723b411b363SPhilipp Reisner if (s_listen) 724b411b363SPhilipp Reisner sock_release(s_listen); 725b411b363SPhilipp Reisner if (err < 0) { 726b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7271ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 728bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 729b411b363SPhilipp Reisner } 730b411b363SPhilipp Reisner } 7311f3e509bSPhilipp Reisner 7327a426fd8SPhilipp Reisner return -EIO; 7331f3e509bSPhilipp Reisner } 7341f3e509bSPhilipp Reisner 735715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) 736715306f6SAndreas Gruenbacher { 737715306f6SAndreas Gruenbacher write_lock_bh(&sk->sk_callback_lock); 738715306f6SAndreas Gruenbacher sk->sk_state_change = ad->original_sk_state_change; 739715306f6SAndreas Gruenbacher sk->sk_user_data = NULL; 740715306f6SAndreas Gruenbacher write_unlock_bh(&sk->sk_callback_lock); 741715306f6SAndreas Gruenbacher } 742715306f6SAndreas Gruenbacher 743bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) 7441f3e509bSPhilipp Reisner { 7451f3e509bSPhilipp Reisner int timeo, connect_int, err = 0; 7461f3e509bSPhilipp Reisner struct socket *s_estab = NULL; 7471f3e509bSPhilipp Reisner struct net_conf *nc; 7481f3e509bSPhilipp Reisner 7491f3e509bSPhilipp Reisner rcu_read_lock(); 750bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 7511f3e509bSPhilipp Reisner if (!nc) { 7521f3e509bSPhilipp Reisner rcu_read_unlock(); 7531f3e509bSPhilipp Reisner return NULL; 7541f3e509bSPhilipp Reisner } 7551f3e509bSPhilipp Reisner connect_int = nc->connect_int; 7561f3e509bSPhilipp Reisner rcu_read_unlock(); 7571f3e509bSPhilipp Reisner 7581f3e509bSPhilipp Reisner timeo = connect_int * HZ; 75938b682b2SAkinobu Mita /* 28.5% random jitter */ 76038b682b2SAkinobu Mita timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7; 7611f3e509bSPhilipp Reisner 7627a426fd8SPhilipp Reisner err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); 7637a426fd8SPhilipp Reisner if (err <= 0) 7647a426fd8SPhilipp Reisner return NULL; 7651f3e509bSPhilipp Reisner 7667a426fd8SPhilipp Reisner err = kernel_accept(ad->s_listen, &s_estab, 0); 767b411b363SPhilipp Reisner if (err < 0) { 768b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7691ec861ebSAndreas Gruenbacher drbd_err(connection, "accept failed, err = %d\n", err); 770bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 771b411b363SPhilipp Reisner } 772b411b363SPhilipp Reisner } 773b411b363SPhilipp Reisner 774715306f6SAndreas Gruenbacher if (s_estab) 775715306f6SAndreas Gruenbacher unregister_state_change(s_estab->sk, ad); 776b411b363SPhilipp Reisner 777b411b363SPhilipp Reisner return s_estab; 778b411b363SPhilipp Reisner } 779b411b363SPhilipp Reisner 780bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *); 781b411b363SPhilipp Reisner 782bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, 7839f5bdc33SAndreas Gruenbacher enum drbd_packet cmd) 7849f5bdc33SAndreas Gruenbacher { 785bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) 7869f5bdc33SAndreas Gruenbacher return -EIO; 787bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, cmd, 0, NULL, 0); 788b411b363SPhilipp Reisner } 789b411b363SPhilipp Reisner 790bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock) 791b411b363SPhilipp Reisner { 792bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 7939f5bdc33SAndreas Gruenbacher struct packet_info pi; 7944920e37aSPhilipp Reisner struct net_conf *nc; 7959f5bdc33SAndreas Gruenbacher int err; 796b411b363SPhilipp Reisner 7974920e37aSPhilipp Reisner rcu_read_lock(); 7984920e37aSPhilipp Reisner nc = rcu_dereference(connection->net_conf); 7994920e37aSPhilipp Reisner if (!nc) { 8004920e37aSPhilipp Reisner rcu_read_unlock(); 8014920e37aSPhilipp Reisner return -EIO; 8024920e37aSPhilipp Reisner } 8034920e37aSPhilipp Reisner sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; 8044920e37aSPhilipp Reisner rcu_read_unlock(); 8054920e37aSPhilipp Reisner 806bde89a9eSAndreas Gruenbacher err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 8079f5bdc33SAndreas Gruenbacher if (err != header_size) { 8089f5bdc33SAndreas Gruenbacher if (err >= 0) 8099f5bdc33SAndreas Gruenbacher err = -EIO; 8109f5bdc33SAndreas Gruenbacher return err; 8119f5bdc33SAndreas Gruenbacher } 812bde89a9eSAndreas Gruenbacher err = decode_header(connection, connection->data.rbuf, &pi); 8139f5bdc33SAndreas Gruenbacher if (err) 8149f5bdc33SAndreas Gruenbacher return err; 8159f5bdc33SAndreas Gruenbacher return pi.cmd; 816b411b363SPhilipp Reisner } 817b411b363SPhilipp Reisner 818b411b363SPhilipp Reisner /** 819b411b363SPhilipp Reisner * drbd_socket_okay() - Free the socket if its connection is not okay 820b411b363SPhilipp Reisner * @sock: pointer to the pointer to the socket. 821b411b363SPhilipp Reisner */ 8225d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock) 823b411b363SPhilipp Reisner { 824b411b363SPhilipp Reisner int rr; 825b411b363SPhilipp Reisner char tb[4]; 826b411b363SPhilipp Reisner 827b411b363SPhilipp Reisner if (!*sock) 82881e84650SAndreas Gruenbacher return false; 829b411b363SPhilipp Reisner 830dbd9eea0SPhilipp Reisner rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 831b411b363SPhilipp Reisner 832b411b363SPhilipp Reisner if (rr > 0 || rr == -EAGAIN) { 83381e84650SAndreas Gruenbacher return true; 834b411b363SPhilipp Reisner } else { 835b411b363SPhilipp Reisner sock_release(*sock); 836b411b363SPhilipp Reisner *sock = NULL; 83781e84650SAndreas Gruenbacher return false; 838b411b363SPhilipp Reisner } 839b411b363SPhilipp Reisner } 8405d0b17f1SPhilipp Reisner 8415d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection, 8425d0b17f1SPhilipp Reisner struct socket **sock1, 8435d0b17f1SPhilipp Reisner struct socket **sock2) 8445d0b17f1SPhilipp Reisner { 8455d0b17f1SPhilipp Reisner struct net_conf *nc; 8465d0b17f1SPhilipp Reisner int timeout; 8475d0b17f1SPhilipp Reisner bool ok; 8485d0b17f1SPhilipp Reisner 8495d0b17f1SPhilipp Reisner if (!*sock1 || !*sock2) 8505d0b17f1SPhilipp Reisner return false; 8515d0b17f1SPhilipp Reisner 8525d0b17f1SPhilipp Reisner rcu_read_lock(); 8535d0b17f1SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 8545d0b17f1SPhilipp Reisner timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; 8555d0b17f1SPhilipp Reisner rcu_read_unlock(); 8565d0b17f1SPhilipp Reisner schedule_timeout_interruptible(timeout); 8575d0b17f1SPhilipp Reisner 8585d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock1); 8595d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock2) && ok; 8605d0b17f1SPhilipp Reisner 8615d0b17f1SPhilipp Reisner return ok; 8625d0b17f1SPhilipp Reisner } 8635d0b17f1SPhilipp Reisner 8642325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created 8652325eb66SPhilipp Reisner in a connection */ 86669a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device) 867907599e0SPhilipp Reisner { 86869a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 8690829f5edSAndreas Gruenbacher int err; 870907599e0SPhilipp Reisner 871b30ab791SAndreas Gruenbacher atomic_set(&device->packet_seq, 0); 872b30ab791SAndreas Gruenbacher device->peer_seq = 0; 873907599e0SPhilipp Reisner 87469a22773SAndreas Gruenbacher device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? 87569a22773SAndreas Gruenbacher &peer_device->connection->cstate_mutex : 876b30ab791SAndreas Gruenbacher &device->own_state_mutex; 8778410da8fSPhilipp Reisner 87869a22773SAndreas Gruenbacher err = drbd_send_sync_param(peer_device); 8790829f5edSAndreas Gruenbacher if (!err) 88069a22773SAndreas Gruenbacher err = drbd_send_sizes(peer_device, 0, 0); 8810829f5edSAndreas Gruenbacher if (!err) 88269a22773SAndreas Gruenbacher err = drbd_send_uuids(peer_device); 8830829f5edSAndreas Gruenbacher if (!err) 88469a22773SAndreas Gruenbacher err = drbd_send_current_state(peer_device); 885b30ab791SAndreas Gruenbacher clear_bit(USE_DEGR_WFC_T, &device->flags); 886b30ab791SAndreas Gruenbacher clear_bit(RESIZE_PENDING, &device->flags); 887b30ab791SAndreas Gruenbacher atomic_set(&device->ap_in_flight, 0); 888b30ab791SAndreas Gruenbacher mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ 8890829f5edSAndreas Gruenbacher return err; 890907599e0SPhilipp Reisner } 891b411b363SPhilipp Reisner 892b411b363SPhilipp Reisner /* 893b411b363SPhilipp Reisner * return values: 894b411b363SPhilipp Reisner * 1 yes, we have a valid connection 895b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 896b411b363SPhilipp Reisner * -1 peer talks different language, 897b411b363SPhilipp Reisner * no point in trying again, please go standalone. 898b411b363SPhilipp Reisner * -2 We do not have a network config... 899b411b363SPhilipp Reisner */ 900bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection) 901b411b363SPhilipp Reisner { 9027da35862SPhilipp Reisner struct drbd_socket sock, msock; 903c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 90444ed167dSPhilipp Reisner struct net_conf *nc; 9055d0b17f1SPhilipp Reisner int vnr, timeout, h; 9065d0b17f1SPhilipp Reisner bool discard_my_data, ok; 907197296ffSPhilipp Reisner enum drbd_state_rv rv; 9087a426fd8SPhilipp Reisner struct accept_wait_data ad = { 909bde89a9eSAndreas Gruenbacher .connection = connection, 9107a426fd8SPhilipp Reisner .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), 9117a426fd8SPhilipp Reisner }; 912b411b363SPhilipp Reisner 913bde89a9eSAndreas Gruenbacher clear_bit(DISCONNECT_SENT, &connection->flags); 914bde89a9eSAndreas Gruenbacher if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) 915b411b363SPhilipp Reisner return -2; 916b411b363SPhilipp Reisner 9177da35862SPhilipp Reisner mutex_init(&sock.mutex); 918bde89a9eSAndreas Gruenbacher sock.sbuf = connection->data.sbuf; 919bde89a9eSAndreas Gruenbacher sock.rbuf = connection->data.rbuf; 9207da35862SPhilipp Reisner sock.socket = NULL; 9217da35862SPhilipp Reisner mutex_init(&msock.mutex); 922bde89a9eSAndreas Gruenbacher msock.sbuf = connection->meta.sbuf; 923bde89a9eSAndreas Gruenbacher msock.rbuf = connection->meta.rbuf; 9247da35862SPhilipp Reisner msock.socket = NULL; 9257da35862SPhilipp Reisner 9260916e0e3SAndreas Gruenbacher /* Assume that the peer only understands protocol 80 until we know better. */ 927bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = 80; 928b411b363SPhilipp Reisner 929bde89a9eSAndreas Gruenbacher if (prepare_listen_socket(connection, &ad)) 9307a426fd8SPhilipp Reisner return 0; 931b411b363SPhilipp Reisner 932b411b363SPhilipp Reisner do { 9332bf89621SAndreas Gruenbacher struct socket *s; 934b411b363SPhilipp Reisner 935bde89a9eSAndreas Gruenbacher s = drbd_try_connect(connection); 936b411b363SPhilipp Reisner if (s) { 9377da35862SPhilipp Reisner if (!sock.socket) { 9387da35862SPhilipp Reisner sock.socket = s; 939bde89a9eSAndreas Gruenbacher send_first_packet(connection, &sock, P_INITIAL_DATA); 9407da35862SPhilipp Reisner } else if (!msock.socket) { 941bde89a9eSAndreas Gruenbacher clear_bit(RESOLVE_CONFLICTS, &connection->flags); 9427da35862SPhilipp Reisner msock.socket = s; 943bde89a9eSAndreas Gruenbacher send_first_packet(connection, &msock, P_INITIAL_META); 944b411b363SPhilipp Reisner } else { 9451ec861ebSAndreas Gruenbacher drbd_err(connection, "Logic error in conn_connect()\n"); 946b411b363SPhilipp Reisner goto out_release_sockets; 947b411b363SPhilipp Reisner } 948b411b363SPhilipp Reisner } 949b411b363SPhilipp Reisner 9505d0b17f1SPhilipp Reisner if (connection_established(connection, &sock.socket, &msock.socket)) 951b411b363SPhilipp Reisner break; 952b411b363SPhilipp Reisner 953b411b363SPhilipp Reisner retry: 954bde89a9eSAndreas Gruenbacher s = drbd_wait_for_connect(connection, &ad); 955b411b363SPhilipp Reisner if (s) { 956bde89a9eSAndreas Gruenbacher int fp = receive_first_packet(connection, s); 9577da35862SPhilipp Reisner drbd_socket_okay(&sock.socket); 9587da35862SPhilipp Reisner drbd_socket_okay(&msock.socket); 95992f14951SPhilipp Reisner switch (fp) { 960e5d6f33aSAndreas Gruenbacher case P_INITIAL_DATA: 9617da35862SPhilipp Reisner if (sock.socket) { 9621ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet S crossed\n"); 9637da35862SPhilipp Reisner sock_release(sock.socket); 96480c6eed4SPhilipp Reisner sock.socket = s; 96580c6eed4SPhilipp Reisner goto randomize; 966b411b363SPhilipp Reisner } 9677da35862SPhilipp Reisner sock.socket = s; 968b411b363SPhilipp Reisner break; 969e5d6f33aSAndreas Gruenbacher case P_INITIAL_META: 970bde89a9eSAndreas Gruenbacher set_bit(RESOLVE_CONFLICTS, &connection->flags); 9717da35862SPhilipp Reisner if (msock.socket) { 9721ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet M crossed\n"); 9737da35862SPhilipp Reisner sock_release(msock.socket); 97480c6eed4SPhilipp Reisner msock.socket = s; 97580c6eed4SPhilipp Reisner goto randomize; 976b411b363SPhilipp Reisner } 9777da35862SPhilipp Reisner msock.socket = s; 978b411b363SPhilipp Reisner break; 979b411b363SPhilipp Reisner default: 9801ec861ebSAndreas Gruenbacher drbd_warn(connection, "Error receiving initial packet\n"); 981b411b363SPhilipp Reisner sock_release(s); 98280c6eed4SPhilipp Reisner randomize: 98338b682b2SAkinobu Mita if (prandom_u32() & 1) 984b411b363SPhilipp Reisner goto retry; 985b411b363SPhilipp Reisner } 986b411b363SPhilipp Reisner } 987b411b363SPhilipp Reisner 988bde89a9eSAndreas Gruenbacher if (connection->cstate <= C_DISCONNECTING) 989b411b363SPhilipp Reisner goto out_release_sockets; 990b411b363SPhilipp Reisner if (signal_pending(current)) { 991b411b363SPhilipp Reisner flush_signals(current); 992b411b363SPhilipp Reisner smp_rmb(); 993bde89a9eSAndreas Gruenbacher if (get_t_state(&connection->receiver) == EXITING) 994b411b363SPhilipp Reisner goto out_release_sockets; 995b411b363SPhilipp Reisner } 996b411b363SPhilipp Reisner 9975d0b17f1SPhilipp Reisner ok = connection_established(connection, &sock.socket, &msock.socket); 998b666dbf8SPhilipp Reisner } while (!ok); 999b411b363SPhilipp Reisner 10007a426fd8SPhilipp Reisner if (ad.s_listen) 10017a426fd8SPhilipp Reisner sock_release(ad.s_listen); 1002b411b363SPhilipp Reisner 100398683650SPhilipp Reisner sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 100498683650SPhilipp Reisner msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 1005b411b363SPhilipp Reisner 10067da35862SPhilipp Reisner sock.socket->sk->sk_allocation = GFP_NOIO; 10077da35862SPhilipp Reisner msock.socket->sk->sk_allocation = GFP_NOIO; 1008b411b363SPhilipp Reisner 10097da35862SPhilipp Reisner sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 10107da35862SPhilipp Reisner msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; 1011b411b363SPhilipp Reisner 1012b411b363SPhilipp Reisner /* NOT YET ... 1013bde89a9eSAndreas Gruenbacher * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; 10147da35862SPhilipp Reisner * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 10156038178eSAndreas Gruenbacher * first set it to the P_CONNECTION_FEATURES timeout, 1016b411b363SPhilipp Reisner * which we set to 4x the configured ping_timeout. */ 101744ed167dSPhilipp Reisner rcu_read_lock(); 1018bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 1019b411b363SPhilipp Reisner 10207da35862SPhilipp Reisner sock.socket->sk->sk_sndtimeo = 10217da35862SPhilipp Reisner sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; 102244ed167dSPhilipp Reisner 10237da35862SPhilipp Reisner msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; 102444ed167dSPhilipp Reisner timeout = nc->timeout * HZ / 10; 102508b165baSPhilipp Reisner discard_my_data = nc->discard_my_data; 102644ed167dSPhilipp Reisner rcu_read_unlock(); 102744ed167dSPhilipp Reisner 10287da35862SPhilipp Reisner msock.socket->sk->sk_sndtimeo = timeout; 1029b411b363SPhilipp Reisner 1030b411b363SPhilipp Reisner /* we don't want delays. 103125985edcSLucas De Marchi * we use TCP_CORK where appropriate, though */ 10327da35862SPhilipp Reisner drbd_tcp_nodelay(sock.socket); 10337da35862SPhilipp Reisner drbd_tcp_nodelay(msock.socket); 1034b411b363SPhilipp Reisner 1035bde89a9eSAndreas Gruenbacher connection->data.socket = sock.socket; 1036bde89a9eSAndreas Gruenbacher connection->meta.socket = msock.socket; 1037bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1038b411b363SPhilipp Reisner 1039bde89a9eSAndreas Gruenbacher h = drbd_do_features(connection); 1040b411b363SPhilipp Reisner if (h <= 0) 1041b411b363SPhilipp Reisner return h; 1042b411b363SPhilipp Reisner 1043bde89a9eSAndreas Gruenbacher if (connection->cram_hmac_tfm) { 1044b30ab791SAndreas Gruenbacher /* drbd_request_state(device, NS(conn, WFAuth)); */ 1045bde89a9eSAndreas Gruenbacher switch (drbd_do_auth(connection)) { 1046b10d96cbSJohannes Thoma case -1: 10471ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed\n"); 1048b411b363SPhilipp Reisner return -1; 1049b10d96cbSJohannes Thoma case 0: 10501ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed, trying again.\n"); 1051b10d96cbSJohannes Thoma return 0; 1052b411b363SPhilipp Reisner } 1053b411b363SPhilipp Reisner } 1054b411b363SPhilipp Reisner 1055bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_sndtimeo = timeout; 1056bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1057b411b363SPhilipp Reisner 1058bde89a9eSAndreas Gruenbacher if (drbd_send_protocol(connection) == -EOPNOTSUPP) 10597e2455c1SPhilipp Reisner return -1; 10601e86ac48SPhilipp Reisner 106113c76abaSPhilipp Reisner /* Prevent a race between resync-handshake and 106213c76abaSPhilipp Reisner * being promoted to Primary. 106313c76abaSPhilipp Reisner * 106413c76abaSPhilipp Reisner * Grab and release the state mutex, so we know that any current 106513c76abaSPhilipp Reisner * drbd_set_role() is finished, and any incoming drbd_set_role 106613c76abaSPhilipp Reisner * will see the STATE_SENT flag, and wait for it to be cleared. 106713c76abaSPhilipp Reisner */ 106831007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 106931007745SPhilipp Reisner mutex_lock(peer_device->device->state_mutex); 107031007745SPhilipp Reisner 107131007745SPhilipp Reisner set_bit(STATE_SENT, &connection->flags); 107231007745SPhilipp Reisner 107331007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 107431007745SPhilipp Reisner mutex_unlock(peer_device->device->state_mutex); 107531007745SPhilipp Reisner 107631007745SPhilipp Reisner rcu_read_lock(); 107731007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 107831007745SPhilipp Reisner struct drbd_device *device = peer_device->device; 107931007745SPhilipp Reisner kref_get(&device->kref); 108031007745SPhilipp Reisner rcu_read_unlock(); 108113c76abaSPhilipp Reisner 108208b165baSPhilipp Reisner if (discard_my_data) 1083b30ab791SAndreas Gruenbacher set_bit(DISCARD_MY_DATA, &device->flags); 108408b165baSPhilipp Reisner else 1085b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 108608b165baSPhilipp Reisner 108769a22773SAndreas Gruenbacher drbd_connected(peer_device); 108805a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1089c141ebdaSPhilipp Reisner rcu_read_lock(); 1090c141ebdaSPhilipp Reisner } 1091c141ebdaSPhilipp Reisner rcu_read_unlock(); 1092c141ebdaSPhilipp Reisner 1093bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); 1094bde89a9eSAndreas Gruenbacher if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { 1095bde89a9eSAndreas Gruenbacher clear_bit(STATE_SENT, &connection->flags); 10961e86ac48SPhilipp Reisner return 0; 1097a1096a6eSPhilipp Reisner } 10981e86ac48SPhilipp Reisner 1099bde89a9eSAndreas Gruenbacher drbd_thread_start(&connection->asender); 1100b411b363SPhilipp Reisner 11010500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 110208b165baSPhilipp Reisner /* The discard_my_data flag is a single-shot modifier to the next 110308b165baSPhilipp Reisner * connection attempt, the handshake of which is now well underway. 110408b165baSPhilipp Reisner * No need for rcu style copying of the whole struct 110508b165baSPhilipp Reisner * just to clear a single value. */ 1106bde89a9eSAndreas Gruenbacher connection->net_conf->discard_my_data = 0; 11070500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 110808b165baSPhilipp Reisner 1109d3fcb490SPhilipp Reisner return h; 1110b411b363SPhilipp Reisner 1111b411b363SPhilipp Reisner out_release_sockets: 11127a426fd8SPhilipp Reisner if (ad.s_listen) 11137a426fd8SPhilipp Reisner sock_release(ad.s_listen); 11147da35862SPhilipp Reisner if (sock.socket) 11157da35862SPhilipp Reisner sock_release(sock.socket); 11167da35862SPhilipp Reisner if (msock.socket) 11177da35862SPhilipp Reisner sock_release(msock.socket); 1118b411b363SPhilipp Reisner return -1; 1119b411b363SPhilipp Reisner } 1120b411b363SPhilipp Reisner 1121bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) 1122b411b363SPhilipp Reisner { 1123bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 1124b411b363SPhilipp Reisner 11250c8e36d9SAndreas Gruenbacher if (header_size == sizeof(struct p_header100) && 11260c8e36d9SAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { 11270c8e36d9SAndreas Gruenbacher struct p_header100 *h = header; 11280c8e36d9SAndreas Gruenbacher if (h->pad != 0) { 11291ec861ebSAndreas Gruenbacher drbd_err(connection, "Header padding is not zero\n"); 11300c8e36d9SAndreas Gruenbacher return -EINVAL; 113102918be2SPhilipp Reisner } 11320c8e36d9SAndreas Gruenbacher pi->vnr = be16_to_cpu(h->volume); 11330c8e36d9SAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 11340c8e36d9SAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 11350c8e36d9SAndreas Gruenbacher } else if (header_size == sizeof(struct p_header95) && 1136e658983aSAndreas Gruenbacher *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { 1137e658983aSAndreas Gruenbacher struct p_header95 *h = header; 1138e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1139b55d84baSAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 1140eefc2f7dSPhilipp Reisner pi->vnr = 0; 1141e658983aSAndreas Gruenbacher } else if (header_size == sizeof(struct p_header80) && 1142e658983aSAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { 1143e658983aSAndreas Gruenbacher struct p_header80 *h = header; 1144e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1145e658983aSAndreas Gruenbacher pi->size = be16_to_cpu(h->length); 114677351055SPhilipp Reisner pi->vnr = 0; 114702918be2SPhilipp Reisner } else { 11481ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", 1149e658983aSAndreas Gruenbacher be32_to_cpu(*(__be32 *)header), 1150bde89a9eSAndreas Gruenbacher connection->agreed_pro_version); 11518172f3e9SAndreas Gruenbacher return -EINVAL; 1152b411b363SPhilipp Reisner } 1153e658983aSAndreas Gruenbacher pi->data = header + header_size; 11548172f3e9SAndreas Gruenbacher return 0; 1155b411b363SPhilipp Reisner } 1156b411b363SPhilipp Reisner 1157bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) 1158257d0af6SPhilipp Reisner { 1159bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 116069bc7bc3SAndreas Gruenbacher int err; 1161257d0af6SPhilipp Reisner 1162bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); 1163a5c31904SAndreas Gruenbacher if (err) 116469bc7bc3SAndreas Gruenbacher return err; 1165257d0af6SPhilipp Reisner 1166bde89a9eSAndreas Gruenbacher err = decode_header(connection, buffer, pi); 1167bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1168b411b363SPhilipp Reisner 116969bc7bc3SAndreas Gruenbacher return err; 1170b411b363SPhilipp Reisner } 1171b411b363SPhilipp Reisner 1172bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection) 1173b411b363SPhilipp Reisner { 1174b411b363SPhilipp Reisner int rv; 1175c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 11764b0007c0SPhilipp Reisner int vnr; 1177b411b363SPhilipp Reisner 1178e9526580SPhilipp Reisner if (connection->resource->write_ordering >= WO_bdev_flush) { 1179615e087fSLars Ellenberg rcu_read_lock(); 1180c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1181c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1182c06ece6bSAndreas Gruenbacher 1183b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 1184615e087fSLars Ellenberg continue; 1185b30ab791SAndreas Gruenbacher kref_get(&device->kref); 1186615e087fSLars Ellenberg rcu_read_unlock(); 11874b0007c0SPhilipp Reisner 1188b30ab791SAndreas Gruenbacher rv = blkdev_issue_flush(device->ldev->backing_bdev, 1189615e087fSLars Ellenberg GFP_NOIO, NULL); 1190b411b363SPhilipp Reisner if (rv) { 1191d0180171SAndreas Gruenbacher drbd_info(device, "local disk flush failed with status %d\n", rv); 1192b411b363SPhilipp Reisner /* would rather check on EOPNOTSUPP, but that is not reliable. 1193b411b363SPhilipp Reisner * don't try again for ANY return value != 0 1194b411b363SPhilipp Reisner * if (rv == -EOPNOTSUPP) */ 11958fe39aacSPhilipp Reisner drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io); 1196b411b363SPhilipp Reisner } 1197b30ab791SAndreas Gruenbacher put_ldev(device); 119805a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1199615e087fSLars Ellenberg 1200615e087fSLars Ellenberg rcu_read_lock(); 1201615e087fSLars Ellenberg if (rv) 12024b0007c0SPhilipp Reisner break; 1203b411b363SPhilipp Reisner } 1204615e087fSLars Ellenberg rcu_read_unlock(); 1205b411b363SPhilipp Reisner } 1206b411b363SPhilipp Reisner } 1207b411b363SPhilipp Reisner 1208b411b363SPhilipp Reisner /** 1209b411b363SPhilipp Reisner * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. 1210b30ab791SAndreas Gruenbacher * @device: DRBD device. 1211b411b363SPhilipp Reisner * @epoch: Epoch object. 1212b411b363SPhilipp Reisner * @ev: Epoch event. 1213b411b363SPhilipp Reisner */ 1214bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, 1215b411b363SPhilipp Reisner struct drbd_epoch *epoch, 1216b411b363SPhilipp Reisner enum epoch_event ev) 1217b411b363SPhilipp Reisner { 12182451fc3bSPhilipp Reisner int epoch_size; 1219b411b363SPhilipp Reisner struct drbd_epoch *next_epoch; 1220b411b363SPhilipp Reisner enum finish_epoch rv = FE_STILL_LIVE; 1221b411b363SPhilipp Reisner 1222bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1223b411b363SPhilipp Reisner do { 1224b411b363SPhilipp Reisner next_epoch = NULL; 1225b411b363SPhilipp Reisner 1226b411b363SPhilipp Reisner epoch_size = atomic_read(&epoch->epoch_size); 1227b411b363SPhilipp Reisner 1228b411b363SPhilipp Reisner switch (ev & ~EV_CLEANUP) { 1229b411b363SPhilipp Reisner case EV_PUT: 1230b411b363SPhilipp Reisner atomic_dec(&epoch->active); 1231b411b363SPhilipp Reisner break; 1232b411b363SPhilipp Reisner case EV_GOT_BARRIER_NR: 1233b411b363SPhilipp Reisner set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 1234b411b363SPhilipp Reisner break; 1235b411b363SPhilipp Reisner case EV_BECAME_LAST: 1236b411b363SPhilipp Reisner /* nothing to do*/ 1237b411b363SPhilipp Reisner break; 1238b411b363SPhilipp Reisner } 1239b411b363SPhilipp Reisner 1240b411b363SPhilipp Reisner if (epoch_size != 0 && 1241b411b363SPhilipp Reisner atomic_read(&epoch->active) == 0 && 124280f9fd55SPhilipp Reisner (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 1243b411b363SPhilipp Reisner if (!(ev & EV_CLEANUP)) { 1244bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1245bde89a9eSAndreas Gruenbacher drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); 1246bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1247b411b363SPhilipp Reisner } 12489ed57dcbSLars Ellenberg #if 0 12499ed57dcbSLars Ellenberg /* FIXME: dec unacked on connection, once we have 12509ed57dcbSLars Ellenberg * something to count pending connection packets in. */ 125180f9fd55SPhilipp Reisner if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 1252bde89a9eSAndreas Gruenbacher dec_unacked(epoch->connection); 12539ed57dcbSLars Ellenberg #endif 1254b411b363SPhilipp Reisner 1255bde89a9eSAndreas Gruenbacher if (connection->current_epoch != epoch) { 1256b411b363SPhilipp Reisner next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 1257b411b363SPhilipp Reisner list_del(&epoch->list); 1258b411b363SPhilipp Reisner ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 1259bde89a9eSAndreas Gruenbacher connection->epochs--; 1260b411b363SPhilipp Reisner kfree(epoch); 1261b411b363SPhilipp Reisner 1262b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1263b411b363SPhilipp Reisner rv = FE_DESTROYED; 1264b411b363SPhilipp Reisner } else { 1265b411b363SPhilipp Reisner epoch->flags = 0; 1266b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1267698f9315SUwe Kleine-König /* atomic_set(&epoch->active, 0); is already zero */ 1268b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1269b411b363SPhilipp Reisner rv = FE_RECYCLED; 1270b411b363SPhilipp Reisner } 1271b411b363SPhilipp Reisner } 1272b411b363SPhilipp Reisner 1273b411b363SPhilipp Reisner if (!next_epoch) 1274b411b363SPhilipp Reisner break; 1275b411b363SPhilipp Reisner 1276b411b363SPhilipp Reisner epoch = next_epoch; 1277b411b363SPhilipp Reisner } while (1); 1278b411b363SPhilipp Reisner 1279bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1280b411b363SPhilipp Reisner 1281b411b363SPhilipp Reisner return rv; 1282b411b363SPhilipp Reisner } 1283b411b363SPhilipp Reisner 12848fe39aacSPhilipp Reisner static enum write_ordering_e 12858fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) 12868fe39aacSPhilipp Reisner { 12878fe39aacSPhilipp Reisner struct disk_conf *dc; 12888fe39aacSPhilipp Reisner 12898fe39aacSPhilipp Reisner dc = rcu_dereference(bdev->disk_conf); 12908fe39aacSPhilipp Reisner 12918fe39aacSPhilipp Reisner if (wo == WO_bdev_flush && !dc->disk_flushes) 12928fe39aacSPhilipp Reisner wo = WO_drain_io; 12938fe39aacSPhilipp Reisner if (wo == WO_drain_io && !dc->disk_drain) 12948fe39aacSPhilipp Reisner wo = WO_none; 12958fe39aacSPhilipp Reisner 12968fe39aacSPhilipp Reisner return wo; 12978fe39aacSPhilipp Reisner } 12988fe39aacSPhilipp Reisner 1299b411b363SPhilipp Reisner /** 1300b411b363SPhilipp Reisner * drbd_bump_write_ordering() - Fall back to an other write ordering method 1301bde89a9eSAndreas Gruenbacher * @connection: DRBD connection. 1302b411b363SPhilipp Reisner * @wo: Write ordering method to try. 1303b411b363SPhilipp Reisner */ 13048fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, 13058fe39aacSPhilipp Reisner enum write_ordering_e wo) 1306b411b363SPhilipp Reisner { 1307e9526580SPhilipp Reisner struct drbd_device *device; 1308b411b363SPhilipp Reisner enum write_ordering_e pwo; 13094b0007c0SPhilipp Reisner int vnr; 1310b411b363SPhilipp Reisner static char *write_ordering_str[] = { 1311b411b363SPhilipp Reisner [WO_none] = "none", 1312b411b363SPhilipp Reisner [WO_drain_io] = "drain", 1313b411b363SPhilipp Reisner [WO_bdev_flush] = "flush", 1314b411b363SPhilipp Reisner }; 1315b411b363SPhilipp Reisner 1316e9526580SPhilipp Reisner pwo = resource->write_ordering; 131770df7092SLars Ellenberg if (wo != WO_bdev_flush) 1318b411b363SPhilipp Reisner wo = min(pwo, wo); 1319daeda1ccSPhilipp Reisner rcu_read_lock(); 1320e9526580SPhilipp Reisner idr_for_each_entry(&resource->devices, device, vnr) { 13218fe39aacSPhilipp Reisner if (get_ldev(device)) { 13228fe39aacSPhilipp Reisner wo = max_allowed_wo(device->ldev, wo); 13238fe39aacSPhilipp Reisner if (device->ldev == bdev) 13248fe39aacSPhilipp Reisner bdev = NULL; 1325b30ab791SAndreas Gruenbacher put_ldev(device); 13264b0007c0SPhilipp Reisner } 13278fe39aacSPhilipp Reisner } 13288fe39aacSPhilipp Reisner 13298fe39aacSPhilipp Reisner if (bdev) 13308fe39aacSPhilipp Reisner wo = max_allowed_wo(bdev, wo); 13318fe39aacSPhilipp Reisner 133270df7092SLars Ellenberg rcu_read_unlock(); 133370df7092SLars Ellenberg 1334e9526580SPhilipp Reisner resource->write_ordering = wo; 1335e9526580SPhilipp Reisner if (pwo != resource->write_ordering || wo == WO_bdev_flush) 1336e9526580SPhilipp Reisner drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 1337b411b363SPhilipp Reisner } 1338b411b363SPhilipp Reisner 1339b411b363SPhilipp Reisner /** 1340fbe29decSAndreas Gruenbacher * drbd_submit_peer_request() 1341b30ab791SAndreas Gruenbacher * @device: DRBD device. 1342db830c46SAndreas Gruenbacher * @peer_req: peer request 134345bb912bSLars Ellenberg * @rw: flag field, see bio->bi_rw 134410f6d992SLars Ellenberg * 134510f6d992SLars Ellenberg * May spread the pages to multiple bios, 134610f6d992SLars Ellenberg * depending on bio_add_page restrictions. 134710f6d992SLars Ellenberg * 134810f6d992SLars Ellenberg * Returns 0 if all bios have been submitted, 134910f6d992SLars Ellenberg * -ENOMEM if we could not allocate enough bios, 135010f6d992SLars Ellenberg * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a 135110f6d992SLars Ellenberg * single page to an empty bio (which should never happen and likely indicates 135210f6d992SLars Ellenberg * that the lower level IO stack is in some way broken). This has been observed 135310f6d992SLars Ellenberg * on certain Xen deployments. 135445bb912bSLars Ellenberg */ 135545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */ 1356b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device, 1357fbe29decSAndreas Gruenbacher struct drbd_peer_request *peer_req, 135845bb912bSLars Ellenberg const unsigned rw, const int fault_type) 135945bb912bSLars Ellenberg { 136045bb912bSLars Ellenberg struct bio *bios = NULL; 136145bb912bSLars Ellenberg struct bio *bio; 1362db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 1363db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 1364db830c46SAndreas Gruenbacher unsigned ds = peer_req->i.size; 136545bb912bSLars Ellenberg unsigned n_bios = 0; 136645bb912bSLars Ellenberg unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; 136710f6d992SLars Ellenberg int err = -ENOMEM; 136845bb912bSLars Ellenberg 1369a0fb3c47SLars Ellenberg if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { 1370a0fb3c47SLars Ellenberg /* wait for all pending IO completions, before we start 1371a0fb3c47SLars Ellenberg * zeroing things out. */ 1372a0fb3c47SLars Ellenberg conn_wait_active_ee_empty(first_peer_device(device)->connection); 1373a0fb3c47SLars Ellenberg if (blkdev_issue_zeroout(device->ldev->backing_bdev, 1374a0fb3c47SLars Ellenberg sector, ds >> 9, GFP_NOIO)) 1375a0fb3c47SLars Ellenberg peer_req->flags |= EE_WAS_ERROR; 1376a0fb3c47SLars Ellenberg drbd_endio_write_sec_final(peer_req); 1377a0fb3c47SLars Ellenberg return 0; 1378a0fb3c47SLars Ellenberg } 1379a0fb3c47SLars Ellenberg 138054ed4ed8SLars Ellenberg /* Discards don't have any payload. 138154ed4ed8SLars Ellenberg * But the scsi layer still expects a bio_vec it can use internally, 138254ed4ed8SLars Ellenberg * see sd_setup_discard_cmnd() and blk_add_request_payload(). */ 1383a0fb3c47SLars Ellenberg if (peer_req->flags & EE_IS_TRIM) 138454ed4ed8SLars Ellenberg nr_pages = 1; 1385a0fb3c47SLars Ellenberg 138645bb912bSLars Ellenberg /* In most cases, we will only need one bio. But in case the lower 138745bb912bSLars Ellenberg * level restrictions happen to be different at this offset on this 138845bb912bSLars Ellenberg * side than those of the sending peer, we may need to submit the 13899476f39dSLars Ellenberg * request in more than one bio. 13909476f39dSLars Ellenberg * 13919476f39dSLars Ellenberg * Plain bio_alloc is good enough here, this is no DRBD internally 13929476f39dSLars Ellenberg * generated bio, but a bio allocated on behalf of the peer. 13939476f39dSLars Ellenberg */ 139445bb912bSLars Ellenberg next_bio: 139545bb912bSLars Ellenberg bio = bio_alloc(GFP_NOIO, nr_pages); 139645bb912bSLars Ellenberg if (!bio) { 1397a0fb3c47SLars Ellenberg drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); 139845bb912bSLars Ellenberg goto fail; 139945bb912bSLars Ellenberg } 1400db830c46SAndreas Gruenbacher /* > peer_req->i.sector, unless this is the first bio */ 14014f024f37SKent Overstreet bio->bi_iter.bi_sector = sector; 1402b30ab791SAndreas Gruenbacher bio->bi_bdev = device->ldev->backing_bdev; 140345bb912bSLars Ellenberg bio->bi_rw = rw; 1404db830c46SAndreas Gruenbacher bio->bi_private = peer_req; 1405fcefa62eSAndreas Gruenbacher bio->bi_end_io = drbd_peer_request_endio; 140645bb912bSLars Ellenberg 140745bb912bSLars Ellenberg bio->bi_next = bios; 140845bb912bSLars Ellenberg bios = bio; 140945bb912bSLars Ellenberg ++n_bios; 141045bb912bSLars Ellenberg 1411a0fb3c47SLars Ellenberg if (rw & REQ_DISCARD) { 1412a0fb3c47SLars Ellenberg bio->bi_iter.bi_size = ds; 1413a0fb3c47SLars Ellenberg goto submit; 1414a0fb3c47SLars Ellenberg } 1415a0fb3c47SLars Ellenberg 141645bb912bSLars Ellenberg page_chain_for_each(page) { 141745bb912bSLars Ellenberg unsigned len = min_t(unsigned, ds, PAGE_SIZE); 141845bb912bSLars Ellenberg if (!bio_add_page(bio, page, len, 0)) { 141910f6d992SLars Ellenberg /* A single page must always be possible! 142010f6d992SLars Ellenberg * But in case it fails anyways, 142110f6d992SLars Ellenberg * we deal with it, and complain (below). */ 142210f6d992SLars Ellenberg if (bio->bi_vcnt == 0) { 1423d0180171SAndreas Gruenbacher drbd_err(device, 142410f6d992SLars Ellenberg "bio_add_page failed for len=%u, " 142510f6d992SLars Ellenberg "bi_vcnt=0 (bi_sector=%llu)\n", 14264f024f37SKent Overstreet len, (uint64_t)bio->bi_iter.bi_sector); 142710f6d992SLars Ellenberg err = -ENOSPC; 142810f6d992SLars Ellenberg goto fail; 142910f6d992SLars Ellenberg } 143045bb912bSLars Ellenberg goto next_bio; 143145bb912bSLars Ellenberg } 143245bb912bSLars Ellenberg ds -= len; 143345bb912bSLars Ellenberg sector += len >> 9; 143445bb912bSLars Ellenberg --nr_pages; 143545bb912bSLars Ellenberg } 14360b0ba1efSAndreas Gruenbacher D_ASSERT(device, ds == 0); 1437a0fb3c47SLars Ellenberg submit: 1438a0fb3c47SLars Ellenberg D_ASSERT(device, page == NULL); 143945bb912bSLars Ellenberg 1440db830c46SAndreas Gruenbacher atomic_set(&peer_req->pending_bios, n_bios); 144145bb912bSLars Ellenberg do { 144245bb912bSLars Ellenberg bio = bios; 144345bb912bSLars Ellenberg bios = bios->bi_next; 144445bb912bSLars Ellenberg bio->bi_next = NULL; 144545bb912bSLars Ellenberg 1446b30ab791SAndreas Gruenbacher drbd_generic_make_request(device, fault_type, bio); 144745bb912bSLars Ellenberg } while (bios); 144845bb912bSLars Ellenberg return 0; 144945bb912bSLars Ellenberg 145045bb912bSLars Ellenberg fail: 145145bb912bSLars Ellenberg while (bios) { 145245bb912bSLars Ellenberg bio = bios; 145345bb912bSLars Ellenberg bios = bios->bi_next; 145445bb912bSLars Ellenberg bio_put(bio); 145545bb912bSLars Ellenberg } 145610f6d992SLars Ellenberg return err; 145745bb912bSLars Ellenberg } 145845bb912bSLars Ellenberg 1459b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device, 1460db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 146153840641SAndreas Gruenbacher { 1462db830c46SAndreas Gruenbacher struct drbd_interval *i = &peer_req->i; 146353840641SAndreas Gruenbacher 1464b30ab791SAndreas Gruenbacher drbd_remove_interval(&device->write_requests, i); 146553840641SAndreas Gruenbacher drbd_clear_interval(i); 146653840641SAndreas Gruenbacher 14676c852becSAndreas Gruenbacher /* Wake up any processes waiting for this peer request to complete. */ 146853840641SAndreas Gruenbacher if (i->waiting) 1469b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 147053840641SAndreas Gruenbacher } 147153840641SAndreas Gruenbacher 1472bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection) 147377fede51SPhilipp Reisner { 1474c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 147577fede51SPhilipp Reisner int vnr; 147677fede51SPhilipp Reisner 147777fede51SPhilipp Reisner rcu_read_lock(); 1478c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1479c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1480c06ece6bSAndreas Gruenbacher 1481b30ab791SAndreas Gruenbacher kref_get(&device->kref); 148277fede51SPhilipp Reisner rcu_read_unlock(); 1483b30ab791SAndreas Gruenbacher drbd_wait_ee_list_empty(device, &device->active_ee); 148405a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 148577fede51SPhilipp Reisner rcu_read_lock(); 148677fede51SPhilipp Reisner } 148777fede51SPhilipp Reisner rcu_read_unlock(); 148877fede51SPhilipp Reisner } 148977fede51SPhilipp Reisner 14909f4fe9adSAndreas Gruenbacher static struct drbd_peer_device * 14919f4fe9adSAndreas Gruenbacher conn_peer_device(struct drbd_connection *connection, int volume_number) 14929f4fe9adSAndreas Gruenbacher { 14939f4fe9adSAndreas Gruenbacher return idr_find(&connection->peer_devices, volume_number); 14949f4fe9adSAndreas Gruenbacher } 14959f4fe9adSAndreas Gruenbacher 1496bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) 1497b411b363SPhilipp Reisner { 14982451fc3bSPhilipp Reisner int rv; 1499e658983aSAndreas Gruenbacher struct p_barrier *p = pi->data; 1500b411b363SPhilipp Reisner struct drbd_epoch *epoch; 1501b411b363SPhilipp Reisner 15029ed57dcbSLars Ellenberg /* FIXME these are unacked on connection, 15039ed57dcbSLars Ellenberg * not a specific (peer)device. 15049ed57dcbSLars Ellenberg */ 1505bde89a9eSAndreas Gruenbacher connection->current_epoch->barrier_nr = p->barrier; 1506bde89a9eSAndreas Gruenbacher connection->current_epoch->connection = connection; 1507bde89a9eSAndreas Gruenbacher rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); 1508b411b363SPhilipp Reisner 1509b411b363SPhilipp Reisner /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 1510b411b363SPhilipp Reisner * the activity log, which means it would not be resynced in case the 1511b411b363SPhilipp Reisner * R_PRIMARY crashes now. 1512b411b363SPhilipp Reisner * Therefore we must send the barrier_ack after the barrier request was 1513b411b363SPhilipp Reisner * completed. */ 1514e9526580SPhilipp Reisner switch (connection->resource->write_ordering) { 1515b411b363SPhilipp Reisner case WO_none: 1516b411b363SPhilipp Reisner if (rv == FE_RECYCLED) 151782bc0194SAndreas Gruenbacher return 0; 1518b411b363SPhilipp Reisner 1519b411b363SPhilipp Reisner /* receiver context, in the writeout path of the other node. 1520b411b363SPhilipp Reisner * avoid potential distributed deadlock */ 1521b411b363SPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 15222451fc3bSPhilipp Reisner if (epoch) 15232451fc3bSPhilipp Reisner break; 15242451fc3bSPhilipp Reisner else 15251ec861ebSAndreas Gruenbacher drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); 15262451fc3bSPhilipp Reisner /* Fall through */ 15272451fc3bSPhilipp Reisner 15282451fc3bSPhilipp Reisner case WO_bdev_flush: 15292451fc3bSPhilipp Reisner case WO_drain_io: 1530bde89a9eSAndreas Gruenbacher conn_wait_active_ee_empty(connection); 1531bde89a9eSAndreas Gruenbacher drbd_flush(connection); 15322451fc3bSPhilipp Reisner 1533bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 15342451fc3bSPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 15352451fc3bSPhilipp Reisner if (epoch) 15362451fc3bSPhilipp Reisner break; 1537b411b363SPhilipp Reisner } 1538b411b363SPhilipp Reisner 153982bc0194SAndreas Gruenbacher return 0; 15402451fc3bSPhilipp Reisner default: 1541e9526580SPhilipp Reisner drbd_err(connection, "Strangeness in connection->write_ordering %d\n", 1542e9526580SPhilipp Reisner connection->resource->write_ordering); 154382bc0194SAndreas Gruenbacher return -EIO; 1544b411b363SPhilipp Reisner } 1545b411b363SPhilipp Reisner 1546b411b363SPhilipp Reisner epoch->flags = 0; 1547b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1548b411b363SPhilipp Reisner atomic_set(&epoch->active, 0); 1549b411b363SPhilipp Reisner 1550bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1551bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 1552bde89a9eSAndreas Gruenbacher list_add(&epoch->list, &connection->current_epoch->list); 1553bde89a9eSAndreas Gruenbacher connection->current_epoch = epoch; 1554bde89a9eSAndreas Gruenbacher connection->epochs++; 1555b411b363SPhilipp Reisner } else { 1556b411b363SPhilipp Reisner /* The current_epoch got recycled while we allocated this one... */ 1557b411b363SPhilipp Reisner kfree(epoch); 1558b411b363SPhilipp Reisner } 1559bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1560b411b363SPhilipp Reisner 156182bc0194SAndreas Gruenbacher return 0; 1562b411b363SPhilipp Reisner } 1563b411b363SPhilipp Reisner 1564b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read) 1565b411b363SPhilipp Reisner * and from receive_Data */ 1566f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request * 156769a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 1568a0fb3c47SLars Ellenberg struct packet_info *pi) __must_hold(local) 1569b411b363SPhilipp Reisner { 157069a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1571b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 1572db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 1573b411b363SPhilipp Reisner struct page *page; 1574a5c31904SAndreas Gruenbacher int dgs, ds, err; 1575a0fb3c47SLars Ellenberg int data_size = pi->size; 157669a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 157769a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 15786b4388acSPhilipp Reisner unsigned long *data; 1579a0fb3c47SLars Ellenberg struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 1580b411b363SPhilipp Reisner 158188104ca4SAndreas Gruenbacher dgs = 0; 1582a0fb3c47SLars Ellenberg if (!trim && peer_device->connection->peer_integrity_tfm) { 158369a22773SAndreas Gruenbacher dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); 15849f5bdc33SAndreas Gruenbacher /* 15859f5bdc33SAndreas Gruenbacher * FIXME: Receive the incoming digest into the receive buffer 15869f5bdc33SAndreas Gruenbacher * here, together with its struct p_data? 15879f5bdc33SAndreas Gruenbacher */ 158869a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); 1589a5c31904SAndreas Gruenbacher if (err) 1590b411b363SPhilipp Reisner return NULL; 1591b411b363SPhilipp Reisner data_size -= dgs; 159288104ca4SAndreas Gruenbacher } 1593b411b363SPhilipp Reisner 1594a0fb3c47SLars Ellenberg if (trim) { 1595a0fb3c47SLars Ellenberg D_ASSERT(peer_device, data_size == 0); 1596a0fb3c47SLars Ellenberg data_size = be32_to_cpu(trim->size); 1597a0fb3c47SLars Ellenberg } 1598a0fb3c47SLars Ellenberg 1599841ce241SAndreas Gruenbacher if (!expect(IS_ALIGNED(data_size, 512))) 1600841ce241SAndreas Gruenbacher return NULL; 1601a0fb3c47SLars Ellenberg /* prepare for larger trim requests. */ 1602a0fb3c47SLars Ellenberg if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE)) 1603841ce241SAndreas Gruenbacher return NULL; 1604b411b363SPhilipp Reisner 16056666032aSLars Ellenberg /* even though we trust out peer, 16066666032aSLars Ellenberg * we sometimes have to double check. */ 16076666032aSLars Ellenberg if (sector + (data_size>>9) > capacity) { 1608d0180171SAndreas Gruenbacher drbd_err(device, "request from peer beyond end of local disk: " 1609fdda6544SLars Ellenberg "capacity: %llus < sector: %llus + size: %u\n", 16106666032aSLars Ellenberg (unsigned long long)capacity, 16116666032aSLars Ellenberg (unsigned long long)sector, data_size); 16126666032aSLars Ellenberg return NULL; 16136666032aSLars Ellenberg } 16146666032aSLars Ellenberg 1615b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 1616b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 1617b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 1618a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO); 1619db830c46SAndreas Gruenbacher if (!peer_req) 1620b411b363SPhilipp Reisner return NULL; 162145bb912bSLars Ellenberg 1622a0fb3c47SLars Ellenberg if (trim) 162381a3537aSLars Ellenberg return peer_req; 1624a73ff323SLars Ellenberg 1625b411b363SPhilipp Reisner ds = data_size; 1626db830c46SAndreas Gruenbacher page = peer_req->pages; 162745bb912bSLars Ellenberg page_chain_for_each(page) { 162845bb912bSLars Ellenberg unsigned len = min_t(int, ds, PAGE_SIZE); 16296b4388acSPhilipp Reisner data = kmap(page); 163069a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1631b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { 1632d0180171SAndreas Gruenbacher drbd_err(device, "Fault injection: Corrupting data on receive\n"); 16336b4388acSPhilipp Reisner data[0] = data[0] ^ (unsigned long)-1; 16346b4388acSPhilipp Reisner } 1635b411b363SPhilipp Reisner kunmap(page); 1636a5c31904SAndreas Gruenbacher if (err) { 1637b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1638b411b363SPhilipp Reisner return NULL; 1639b411b363SPhilipp Reisner } 1640a5c31904SAndreas Gruenbacher ds -= len; 1641b411b363SPhilipp Reisner } 1642b411b363SPhilipp Reisner 1643b411b363SPhilipp Reisner if (dgs) { 164469a22773SAndreas Gruenbacher drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv); 1645b411b363SPhilipp Reisner if (memcmp(dig_in, dig_vv, dgs)) { 1646d0180171SAndreas Gruenbacher drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", 1647470be44aSLars Ellenberg (unsigned long long)sector, data_size); 1648b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1649b411b363SPhilipp Reisner return NULL; 1650b411b363SPhilipp Reisner } 1651b411b363SPhilipp Reisner } 1652b30ab791SAndreas Gruenbacher device->recv_cnt += data_size>>9; 1653db830c46SAndreas Gruenbacher return peer_req; 1654b411b363SPhilipp Reisner } 1655b411b363SPhilipp Reisner 1656b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block 1657b411b363SPhilipp Reisner * out of the socket input buffer, and discards it. 1658b411b363SPhilipp Reisner */ 165969a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) 1660b411b363SPhilipp Reisner { 1661b411b363SPhilipp Reisner struct page *page; 1662a5c31904SAndreas Gruenbacher int err = 0; 1663b411b363SPhilipp Reisner void *data; 1664b411b363SPhilipp Reisner 1665c3470cdeSLars Ellenberg if (!data_size) 1666fc5be839SAndreas Gruenbacher return 0; 1667c3470cdeSLars Ellenberg 166869a22773SAndreas Gruenbacher page = drbd_alloc_pages(peer_device, 1, 1); 1669b411b363SPhilipp Reisner 1670b411b363SPhilipp Reisner data = kmap(page); 1671b411b363SPhilipp Reisner while (data_size) { 1672fc5be839SAndreas Gruenbacher unsigned int len = min_t(int, data_size, PAGE_SIZE); 1673fc5be839SAndreas Gruenbacher 167469a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1675a5c31904SAndreas Gruenbacher if (err) 1676b411b363SPhilipp Reisner break; 1677a5c31904SAndreas Gruenbacher data_size -= len; 1678b411b363SPhilipp Reisner } 1679b411b363SPhilipp Reisner kunmap(page); 168069a22773SAndreas Gruenbacher drbd_free_pages(peer_device->device, page, 0); 1681fc5be839SAndreas Gruenbacher return err; 1682b411b363SPhilipp Reisner } 1683b411b363SPhilipp Reisner 168469a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, 1685b411b363SPhilipp Reisner sector_t sector, int data_size) 1686b411b363SPhilipp Reisner { 16877988613bSKent Overstreet struct bio_vec bvec; 16887988613bSKent Overstreet struct bvec_iter iter; 1689b411b363SPhilipp Reisner struct bio *bio; 16907988613bSKent Overstreet int dgs, err, expect; 169169a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 169269a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 1693b411b363SPhilipp Reisner 169488104ca4SAndreas Gruenbacher dgs = 0; 169569a22773SAndreas Gruenbacher if (peer_device->connection->peer_integrity_tfm) { 169669a22773SAndreas Gruenbacher dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); 169769a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs); 1698a5c31904SAndreas Gruenbacher if (err) 1699a5c31904SAndreas Gruenbacher return err; 1700b411b363SPhilipp Reisner data_size -= dgs; 170188104ca4SAndreas Gruenbacher } 1702b411b363SPhilipp Reisner 1703b411b363SPhilipp Reisner /* optimistically update recv_cnt. if receiving fails below, 1704b411b363SPhilipp Reisner * we disconnect anyways, and counters will be reset. */ 170569a22773SAndreas Gruenbacher peer_device->device->recv_cnt += data_size>>9; 1706b411b363SPhilipp Reisner 1707b411b363SPhilipp Reisner bio = req->master_bio; 170869a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); 1709b411b363SPhilipp Reisner 17107988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 17117988613bSKent Overstreet void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; 17127988613bSKent Overstreet expect = min_t(int, data_size, bvec.bv_len); 171369a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, mapped, expect); 17147988613bSKent Overstreet kunmap(bvec.bv_page); 1715a5c31904SAndreas Gruenbacher if (err) 1716a5c31904SAndreas Gruenbacher return err; 1717a5c31904SAndreas Gruenbacher data_size -= expect; 1718b411b363SPhilipp Reisner } 1719b411b363SPhilipp Reisner 1720b411b363SPhilipp Reisner if (dgs) { 172169a22773SAndreas Gruenbacher drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); 1722b411b363SPhilipp Reisner if (memcmp(dig_in, dig_vv, dgs)) { 172369a22773SAndreas Gruenbacher drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); 172428284cefSAndreas Gruenbacher return -EINVAL; 1725b411b363SPhilipp Reisner } 1726b411b363SPhilipp Reisner } 1727b411b363SPhilipp Reisner 172869a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, data_size == 0); 172928284cefSAndreas Gruenbacher return 0; 1730b411b363SPhilipp Reisner } 1731b411b363SPhilipp Reisner 1732a990be46SAndreas Gruenbacher /* 1733a990be46SAndreas Gruenbacher * e_end_resync_block() is called in asender context via 1734a990be46SAndreas Gruenbacher * drbd_finish_peer_reqs(). 1735a990be46SAndreas Gruenbacher */ 173699920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused) 1737b411b363SPhilipp Reisner { 17388050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 1739a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1740a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1741a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1742db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 174399920dc5SAndreas Gruenbacher int err; 1744b411b363SPhilipp Reisner 17450b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 1746b411b363SPhilipp Reisner 1747db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1748b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 1749a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); 1750b411b363SPhilipp Reisner } else { 1751b411b363SPhilipp Reisner /* Record failure to sync */ 1752b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, peer_req->i.size); 1753b411b363SPhilipp Reisner 1754a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 1755b411b363SPhilipp Reisner } 1756b30ab791SAndreas Gruenbacher dec_unacked(device); 1757b411b363SPhilipp Reisner 175899920dc5SAndreas Gruenbacher return err; 1759b411b363SPhilipp Reisner } 1760b411b363SPhilipp Reisner 176169a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, 1762a0fb3c47SLars Ellenberg struct packet_info *pi) __releases(local) 1763b411b363SPhilipp Reisner { 176469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1765db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 1766b411b363SPhilipp Reisner 1767a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); 1768db830c46SAndreas Gruenbacher if (!peer_req) 176945bb912bSLars Ellenberg goto fail; 1770b411b363SPhilipp Reisner 1771b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1772b411b363SPhilipp Reisner 1773b30ab791SAndreas Gruenbacher inc_unacked(device); 1774b411b363SPhilipp Reisner /* corresponding dec_unacked() in e_end_resync_block() 1775b411b363SPhilipp Reisner * respective _drbd_clear_done_ee */ 1776b411b363SPhilipp Reisner 1777a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_resync_block; 177845bb912bSLars Ellenberg 17790500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1780a8cd15baSAndreas Gruenbacher list_add(&peer_req->w.list, &device->sync_ee); 17810500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1782b411b363SPhilipp Reisner 1783a0fb3c47SLars Ellenberg atomic_add(pi->size >> 9, &device->rs_sect_ev); 1784b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) 1785e1c1b0fcSAndreas Gruenbacher return 0; 178645bb912bSLars Ellenberg 178710f6d992SLars Ellenberg /* don't care for the reason here */ 1788d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 17890500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1790a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 17910500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 179222cc37a9SLars Ellenberg 1793b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 179445bb912bSLars Ellenberg fail: 1795b30ab791SAndreas Gruenbacher put_ldev(device); 1796e1c1b0fcSAndreas Gruenbacher return -EIO; 1797b411b363SPhilipp Reisner } 1798b411b363SPhilipp Reisner 1799668eebc6SAndreas Gruenbacher static struct drbd_request * 1800b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id, 1801bc9c5c41SAndreas Gruenbacher sector_t sector, bool missing_ok, const char *func) 1802b411b363SPhilipp Reisner { 1803b411b363SPhilipp Reisner struct drbd_request *req; 1804668eebc6SAndreas Gruenbacher 1805bc9c5c41SAndreas Gruenbacher /* Request object according to our peer */ 1806bc9c5c41SAndreas Gruenbacher req = (struct drbd_request *)(unsigned long)id; 18075e472264SAndreas Gruenbacher if (drbd_contains_interval(root, sector, &req->i) && req->i.local) 1808668eebc6SAndreas Gruenbacher return req; 1809c3afd8f5SAndreas Gruenbacher if (!missing_ok) { 1810d0180171SAndreas Gruenbacher drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, 1811c3afd8f5SAndreas Gruenbacher (unsigned long)id, (unsigned long long)sector); 1812c3afd8f5SAndreas Gruenbacher } 1813668eebc6SAndreas Gruenbacher return NULL; 1814668eebc6SAndreas Gruenbacher } 1815668eebc6SAndreas Gruenbacher 1816bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) 1817b411b363SPhilipp Reisner { 18189f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1819b30ab791SAndreas Gruenbacher struct drbd_device *device; 1820b411b363SPhilipp Reisner struct drbd_request *req; 1821b411b363SPhilipp Reisner sector_t sector; 182282bc0194SAndreas Gruenbacher int err; 1823e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 18244a76b161SAndreas Gruenbacher 18259f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 18269f4fe9adSAndreas Gruenbacher if (!peer_device) 18274a76b161SAndreas Gruenbacher return -EIO; 18289f4fe9adSAndreas Gruenbacher device = peer_device->device; 1829b411b363SPhilipp Reisner 1830b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 1831b411b363SPhilipp Reisner 18320500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1833b30ab791SAndreas Gruenbacher req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); 18340500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1835c3afd8f5SAndreas Gruenbacher if (unlikely(!req)) 183682bc0194SAndreas Gruenbacher return -EIO; 1837b411b363SPhilipp Reisner 183824c4830cSBart Van Assche /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid 1839b411b363SPhilipp Reisner * special casing it there for the various failure cases. 1840b411b363SPhilipp Reisner * still no race with drbd_fail_pending_reads */ 184169a22773SAndreas Gruenbacher err = recv_dless_read(peer_device, req, sector, pi->size); 184282bc0194SAndreas Gruenbacher if (!err) 18438554df1cSAndreas Gruenbacher req_mod(req, DATA_RECEIVED); 1844b411b363SPhilipp Reisner /* else: nothing. handled from drbd_disconnect... 1845b411b363SPhilipp Reisner * I don't think we may complete this just yet 1846b411b363SPhilipp Reisner * in case we are "on-disconnect: freeze" */ 1847b411b363SPhilipp Reisner 184882bc0194SAndreas Gruenbacher return err; 1849b411b363SPhilipp Reisner } 1850b411b363SPhilipp Reisner 1851bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) 1852b411b363SPhilipp Reisner { 18539f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1854b30ab791SAndreas Gruenbacher struct drbd_device *device; 1855b411b363SPhilipp Reisner sector_t sector; 185682bc0194SAndreas Gruenbacher int err; 1857e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 18584a76b161SAndreas Gruenbacher 18599f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 18609f4fe9adSAndreas Gruenbacher if (!peer_device) 18614a76b161SAndreas Gruenbacher return -EIO; 18629f4fe9adSAndreas Gruenbacher device = peer_device->device; 1863b411b363SPhilipp Reisner 1864b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 18650b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->block_id == ID_SYNCER); 1866b411b363SPhilipp Reisner 1867b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1868b411b363SPhilipp Reisner /* data is submitted to disk within recv_resync_read. 1869b411b363SPhilipp Reisner * corresponding put_ldev done below on error, 1870fcefa62eSAndreas Gruenbacher * or in drbd_peer_request_endio. */ 1871a0fb3c47SLars Ellenberg err = recv_resync_read(peer_device, sector, pi); 1872b411b363SPhilipp Reisner } else { 1873b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1874d0180171SAndreas Gruenbacher drbd_err(device, "Can not write resync data to local disk.\n"); 1875b411b363SPhilipp Reisner 187669a22773SAndreas Gruenbacher err = drbd_drain_block(peer_device, pi->size); 1877b411b363SPhilipp Reisner 187869a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 1879b411b363SPhilipp Reisner } 1880b411b363SPhilipp Reisner 1881b30ab791SAndreas Gruenbacher atomic_add(pi->size >> 9, &device->rs_sect_in); 1882778f271dSPhilipp Reisner 188382bc0194SAndreas Gruenbacher return err; 1884b411b363SPhilipp Reisner } 1885b411b363SPhilipp Reisner 1886b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device, 18877be8da07SAndreas Gruenbacher sector_t sector, int size) 1888b411b363SPhilipp Reisner { 18897be8da07SAndreas Gruenbacher struct drbd_interval *i; 18907be8da07SAndreas Gruenbacher struct drbd_request *req; 1891b411b363SPhilipp Reisner 1892b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 18937be8da07SAndreas Gruenbacher if (!i->local) 18947be8da07SAndreas Gruenbacher continue; 18957be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 18967be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 18977be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) 18987be8da07SAndreas Gruenbacher continue; 18992312f0b3SLars Ellenberg /* as it is RQ_POSTPONED, this will cause it to 19002312f0b3SLars Ellenberg * be queued on the retry workqueue. */ 1901d4dabbe2SLars Ellenberg __req_mod(req, CONFLICT_RESOLVED, NULL); 19027be8da07SAndreas Gruenbacher } 19037be8da07SAndreas Gruenbacher } 19047be8da07SAndreas Gruenbacher 1905a990be46SAndreas Gruenbacher /* 1906a990be46SAndreas Gruenbacher * e_end_block() is called in asender context via drbd_finish_peer_reqs(). 1907b411b363SPhilipp Reisner */ 190899920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel) 1909b411b363SPhilipp Reisner { 19108050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 1911a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1912a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1913a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1914db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 191599920dc5SAndreas Gruenbacher int err = 0, pcmd; 1916b411b363SPhilipp Reisner 1917303d1448SPhilipp Reisner if (peer_req->flags & EE_SEND_WRITE_ACK) { 1918db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1919b30ab791SAndreas Gruenbacher pcmd = (device->state.conn >= C_SYNC_SOURCE && 1920b30ab791SAndreas Gruenbacher device->state.conn <= C_PAUSED_SYNC_T && 1921db830c46SAndreas Gruenbacher peer_req->flags & EE_MAY_SET_IN_SYNC) ? 1922b411b363SPhilipp Reisner P_RS_WRITE_ACK : P_WRITE_ACK; 1923a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, pcmd, peer_req); 1924b411b363SPhilipp Reisner if (pcmd == P_RS_WRITE_ACK) 1925b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 1926b411b363SPhilipp Reisner } else { 1927a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 1928b411b363SPhilipp Reisner /* we expect it to be marked out of sync anyways... 1929b411b363SPhilipp Reisner * maybe assert this? */ 1930b411b363SPhilipp Reisner } 1931b30ab791SAndreas Gruenbacher dec_unacked(device); 1932b411b363SPhilipp Reisner } 193308d0dabfSLars Ellenberg 1934b411b363SPhilipp Reisner /* we delete from the conflict detection hash _after_ we sent out the 1935b411b363SPhilipp Reisner * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1936302bdeaeSPhilipp Reisner if (peer_req->flags & EE_IN_INTERVAL_TREE) { 19370500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 19380b0ba1efSAndreas Gruenbacher D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); 1939b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 19407be8da07SAndreas Gruenbacher if (peer_req->flags & EE_RESTART_REQUESTS) 1941b30ab791SAndreas Gruenbacher restart_conflicting_writes(device, sector, peer_req->i.size); 19420500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1943bb3bfe96SAndreas Gruenbacher } else 19440b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 1945b411b363SPhilipp Reisner 1946a6b32bc3SAndreas Gruenbacher drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 1947b411b363SPhilipp Reisner 194899920dc5SAndreas Gruenbacher return err; 1949b411b363SPhilipp Reisner } 1950b411b363SPhilipp Reisner 1951a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) 1952b411b363SPhilipp Reisner { 19538050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 1954a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1955a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 195699920dc5SAndreas Gruenbacher int err; 1957b411b363SPhilipp Reisner 1958a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, ack, peer_req); 1959a8cd15baSAndreas Gruenbacher dec_unacked(peer_device->device); 1960b411b363SPhilipp Reisner 196199920dc5SAndreas Gruenbacher return err; 1962b411b363SPhilipp Reisner } 1963b411b363SPhilipp Reisner 1964d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused) 1965b6a370baSPhilipp Reisner { 1966a8cd15baSAndreas Gruenbacher return e_send_ack(w, P_SUPERSEDED); 19677be8da07SAndreas Gruenbacher } 1968b6a370baSPhilipp Reisner 196999920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused) 19707be8da07SAndreas Gruenbacher { 1971a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = 1972a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1973a8cd15baSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 19747be8da07SAndreas Gruenbacher 1975a8cd15baSAndreas Gruenbacher return e_send_ack(w, connection->agreed_pro_version >= 100 ? 1976d4dabbe2SLars Ellenberg P_RETRY_WRITE : P_SUPERSEDED); 19777be8da07SAndreas Gruenbacher } 19787be8da07SAndreas Gruenbacher 19793e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b) 19803e394da1SAndreas Gruenbacher { 19813e394da1SAndreas Gruenbacher /* 19823e394da1SAndreas Gruenbacher * We assume 32-bit wrap-around here. 19833e394da1SAndreas Gruenbacher * For 24-bit wrap-around, we would have to shift: 19843e394da1SAndreas Gruenbacher * a <<= 8; b <<= 8; 19853e394da1SAndreas Gruenbacher */ 19863e394da1SAndreas Gruenbacher return (s32)a - (s32)b > 0; 19873e394da1SAndreas Gruenbacher } 19883e394da1SAndreas Gruenbacher 19893e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b) 19903e394da1SAndreas Gruenbacher { 19913e394da1SAndreas Gruenbacher return seq_greater(a, b) ? a : b; 19923e394da1SAndreas Gruenbacher } 19933e394da1SAndreas Gruenbacher 199469a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) 19953e394da1SAndreas Gruenbacher { 199669a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 19973c13b680SLars Ellenberg unsigned int newest_peer_seq; 19983e394da1SAndreas Gruenbacher 199969a22773SAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { 2000b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2001b30ab791SAndreas Gruenbacher newest_peer_seq = seq_max(device->peer_seq, peer_seq); 2002b30ab791SAndreas Gruenbacher device->peer_seq = newest_peer_seq; 2003b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2004b30ab791SAndreas Gruenbacher /* wake up only if we actually changed device->peer_seq */ 20053c13b680SLars Ellenberg if (peer_seq == newest_peer_seq) 2006b30ab791SAndreas Gruenbacher wake_up(&device->seq_wait); 20073e394da1SAndreas Gruenbacher } 20087be8da07SAndreas Gruenbacher } 20093e394da1SAndreas Gruenbacher 2010d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 2011d93f6302SLars Ellenberg { 2012d93f6302SLars Ellenberg return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 2013d93f6302SLars Ellenberg } 2014d93f6302SLars Ellenberg 2015d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */ 2016b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) 2017d93f6302SLars Ellenberg { 2018d93f6302SLars Ellenberg struct drbd_peer_request *rs_req; 2019b6a370baSPhilipp Reisner bool rv = 0; 2020b6a370baSPhilipp Reisner 20210500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2022a8cd15baSAndreas Gruenbacher list_for_each_entry(rs_req, &device->sync_ee, w.list) { 2023d93f6302SLars Ellenberg if (overlaps(peer_req->i.sector, peer_req->i.size, 2024d93f6302SLars Ellenberg rs_req->i.sector, rs_req->i.size)) { 2025b6a370baSPhilipp Reisner rv = 1; 2026b6a370baSPhilipp Reisner break; 2027b6a370baSPhilipp Reisner } 2028b6a370baSPhilipp Reisner } 20290500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2030b6a370baSPhilipp Reisner 2031b6a370baSPhilipp Reisner return rv; 2032b6a370baSPhilipp Reisner } 2033b6a370baSPhilipp Reisner 2034b411b363SPhilipp Reisner /* Called from receive_Data. 2035b411b363SPhilipp Reisner * Synchronize packets on sock with packets on msock. 2036b411b363SPhilipp Reisner * 2037b411b363SPhilipp Reisner * This is here so even when a P_DATA packet traveling via sock overtook an Ack 2038b411b363SPhilipp Reisner * packet traveling on msock, they are still processed in the order they have 2039b411b363SPhilipp Reisner * been sent. 2040b411b363SPhilipp Reisner * 2041b411b363SPhilipp Reisner * Note: we don't care for Ack packets overtaking P_DATA packets. 2042b411b363SPhilipp Reisner * 2043b30ab791SAndreas Gruenbacher * In case packet_seq is larger than device->peer_seq number, there are 2044b411b363SPhilipp Reisner * outstanding packets on the msock. We wait for them to arrive. 2045b30ab791SAndreas Gruenbacher * In case we are the logically next packet, we update device->peer_seq 2046b411b363SPhilipp Reisner * ourselves. Correctly handles 32bit wrap around. 2047b411b363SPhilipp Reisner * 2048b411b363SPhilipp Reisner * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, 2049b411b363SPhilipp Reisner * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds 2050b411b363SPhilipp Reisner * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have 2051b411b363SPhilipp Reisner * 1<<9 == 512 seconds aka ages for the 32bit wrap around... 2052b411b363SPhilipp Reisner * 2053b411b363SPhilipp Reisner * returns 0 if we may process the packet, 2054b411b363SPhilipp Reisner * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 205569a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) 2056b411b363SPhilipp Reisner { 205769a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2058b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2059b411b363SPhilipp Reisner long timeout; 2060b874d231SPhilipp Reisner int ret = 0, tp; 20617be8da07SAndreas Gruenbacher 206269a22773SAndreas Gruenbacher if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) 20637be8da07SAndreas Gruenbacher return 0; 20647be8da07SAndreas Gruenbacher 2065b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2066b411b363SPhilipp Reisner for (;;) { 2067b30ab791SAndreas Gruenbacher if (!seq_greater(peer_seq - 1, device->peer_seq)) { 2068b30ab791SAndreas Gruenbacher device->peer_seq = seq_max(device->peer_seq, peer_seq); 2069b411b363SPhilipp Reisner break; 20707be8da07SAndreas Gruenbacher } 2071b874d231SPhilipp Reisner 2072b411b363SPhilipp Reisner if (signal_pending(current)) { 2073b411b363SPhilipp Reisner ret = -ERESTARTSYS; 2074b411b363SPhilipp Reisner break; 2075b411b363SPhilipp Reisner } 2076b874d231SPhilipp Reisner 2077b874d231SPhilipp Reisner rcu_read_lock(); 2078a6b32bc3SAndreas Gruenbacher tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries; 2079b874d231SPhilipp Reisner rcu_read_unlock(); 2080b874d231SPhilipp Reisner 2081b874d231SPhilipp Reisner if (!tp) 2082b874d231SPhilipp Reisner break; 2083b874d231SPhilipp Reisner 2084b874d231SPhilipp Reisner /* Only need to wait if two_primaries is enabled */ 2085b30ab791SAndreas Gruenbacher prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); 2086b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 208744ed167dSPhilipp Reisner rcu_read_lock(); 208869a22773SAndreas Gruenbacher timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; 208944ed167dSPhilipp Reisner rcu_read_unlock(); 209071b1c1ebSAndreas Gruenbacher timeout = schedule_timeout(timeout); 2091b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 20927be8da07SAndreas Gruenbacher if (!timeout) { 2093b411b363SPhilipp Reisner ret = -ETIMEDOUT; 2094d0180171SAndreas Gruenbacher drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); 2095b411b363SPhilipp Reisner break; 2096b411b363SPhilipp Reisner } 2097b411b363SPhilipp Reisner } 2098b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2099b30ab791SAndreas Gruenbacher finish_wait(&device->seq_wait, &wait); 2100b411b363SPhilipp Reisner return ret; 2101b411b363SPhilipp Reisner } 2102b411b363SPhilipp Reisner 2103688593c5SLars Ellenberg /* see also bio_flags_to_wire() 2104688593c5SLars Ellenberg * DRBD_REQ_*, because we need to semantically map the flags to data packet 2105688593c5SLars Ellenberg * flags and back. We may replicate to other kernel versions. */ 210681f0ffd2SAndreas Gruenbacher static unsigned long wire_flags_to_bio(u32 dpf) 210776d2e7ecSPhilipp Reisner { 210876d2e7ecSPhilipp Reisner return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 210976d2e7ecSPhilipp Reisner (dpf & DP_FUA ? REQ_FUA : 0) | 2110688593c5SLars Ellenberg (dpf & DP_FLUSH ? REQ_FLUSH : 0) | 211176d2e7ecSPhilipp Reisner (dpf & DP_DISCARD ? REQ_DISCARD : 0); 211276d2e7ecSPhilipp Reisner } 211376d2e7ecSPhilipp Reisner 2114b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector, 21157be8da07SAndreas Gruenbacher unsigned int size) 2116b411b363SPhilipp Reisner { 21177be8da07SAndreas Gruenbacher struct drbd_interval *i; 21187be8da07SAndreas Gruenbacher 21197be8da07SAndreas Gruenbacher repeat: 2120b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 21217be8da07SAndreas Gruenbacher struct drbd_request *req; 21227be8da07SAndreas Gruenbacher struct bio_and_error m; 21237be8da07SAndreas Gruenbacher 21247be8da07SAndreas Gruenbacher if (!i->local) 21257be8da07SAndreas Gruenbacher continue; 21267be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 21277be8da07SAndreas Gruenbacher if (!(req->rq_state & RQ_POSTPONED)) 21287be8da07SAndreas Gruenbacher continue; 21297be8da07SAndreas Gruenbacher req->rq_state &= ~RQ_POSTPONED; 21307be8da07SAndreas Gruenbacher __req_mod(req, NEG_ACKED, &m); 21310500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 21327be8da07SAndreas Gruenbacher if (m.bio) 2133b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 21340500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 21357be8da07SAndreas Gruenbacher goto repeat; 21367be8da07SAndreas Gruenbacher } 21377be8da07SAndreas Gruenbacher } 21387be8da07SAndreas Gruenbacher 2139b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device, 21407be8da07SAndreas Gruenbacher struct drbd_peer_request *peer_req) 21417be8da07SAndreas Gruenbacher { 2142e33b32deSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 2143bde89a9eSAndreas Gruenbacher bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); 21447be8da07SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 21457be8da07SAndreas Gruenbacher const unsigned int size = peer_req->i.size; 21467be8da07SAndreas Gruenbacher struct drbd_interval *i; 21477be8da07SAndreas Gruenbacher bool equal; 21487be8da07SAndreas Gruenbacher int err; 21497be8da07SAndreas Gruenbacher 21507be8da07SAndreas Gruenbacher /* 21517be8da07SAndreas Gruenbacher * Inserting the peer request into the write_requests tree will prevent 21527be8da07SAndreas Gruenbacher * new conflicting local requests from being added. 21537be8da07SAndreas Gruenbacher */ 2154b30ab791SAndreas Gruenbacher drbd_insert_interval(&device->write_requests, &peer_req->i); 21557be8da07SAndreas Gruenbacher 21567be8da07SAndreas Gruenbacher repeat: 2157b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 21587be8da07SAndreas Gruenbacher if (i == &peer_req->i) 21597be8da07SAndreas Gruenbacher continue; 216008d0dabfSLars Ellenberg if (i->completed) 216108d0dabfSLars Ellenberg continue; 21627be8da07SAndreas Gruenbacher 21637be8da07SAndreas Gruenbacher if (!i->local) { 21647be8da07SAndreas Gruenbacher /* 21657be8da07SAndreas Gruenbacher * Our peer has sent a conflicting remote request; this 21667be8da07SAndreas Gruenbacher * should not happen in a two-node setup. Wait for the 21677be8da07SAndreas Gruenbacher * earlier peer request to complete. 21687be8da07SAndreas Gruenbacher */ 2169b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, i); 21707be8da07SAndreas Gruenbacher if (err) 21717be8da07SAndreas Gruenbacher goto out; 21727be8da07SAndreas Gruenbacher goto repeat; 21737be8da07SAndreas Gruenbacher } 21747be8da07SAndreas Gruenbacher 21757be8da07SAndreas Gruenbacher equal = i->sector == sector && i->size == size; 21767be8da07SAndreas Gruenbacher if (resolve_conflicts) { 21777be8da07SAndreas Gruenbacher /* 21787be8da07SAndreas Gruenbacher * If the peer request is fully contained within the 2179d4dabbe2SLars Ellenberg * overlapping request, it can be considered overwritten 2180d4dabbe2SLars Ellenberg * and thus superseded; otherwise, it will be retried 2181d4dabbe2SLars Ellenberg * once all overlapping requests have completed. 21827be8da07SAndreas Gruenbacher */ 2183d4dabbe2SLars Ellenberg bool superseded = i->sector <= sector && i->sector + 21847be8da07SAndreas Gruenbacher (i->size >> 9) >= sector + (size >> 9); 21857be8da07SAndreas Gruenbacher 21867be8da07SAndreas Gruenbacher if (!equal) 2187d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 21887be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u, " 21897be8da07SAndreas Gruenbacher "assuming %s came first\n", 21907be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 21917be8da07SAndreas Gruenbacher (unsigned long long)sector, size, 2192d4dabbe2SLars Ellenberg superseded ? "local" : "remote"); 21937be8da07SAndreas Gruenbacher 2194b30ab791SAndreas Gruenbacher inc_unacked(device); 2195a8cd15baSAndreas Gruenbacher peer_req->w.cb = superseded ? e_send_superseded : 21967be8da07SAndreas Gruenbacher e_send_retry_write; 2197a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->done_ee); 2198e33b32deSAndreas Gruenbacher wake_asender(connection); 21997be8da07SAndreas Gruenbacher 22007be8da07SAndreas Gruenbacher err = -ENOENT; 22017be8da07SAndreas Gruenbacher goto out; 22027be8da07SAndreas Gruenbacher } else { 22037be8da07SAndreas Gruenbacher struct drbd_request *req = 22047be8da07SAndreas Gruenbacher container_of(i, struct drbd_request, i); 22057be8da07SAndreas Gruenbacher 22067be8da07SAndreas Gruenbacher if (!equal) 2207d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 22087be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u\n", 22097be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 22107be8da07SAndreas Gruenbacher (unsigned long long)sector, size); 22117be8da07SAndreas Gruenbacher 22127be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 22137be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) { 22147be8da07SAndreas Gruenbacher /* 22157be8da07SAndreas Gruenbacher * Wait for the node with the discard flag to 2216d4dabbe2SLars Ellenberg * decide if this request has been superseded 2217d4dabbe2SLars Ellenberg * or needs to be retried. 2218d4dabbe2SLars Ellenberg * Requests that have been superseded will 22197be8da07SAndreas Gruenbacher * disappear from the write_requests tree. 22207be8da07SAndreas Gruenbacher * 22217be8da07SAndreas Gruenbacher * In addition, wait for the conflicting 22227be8da07SAndreas Gruenbacher * request to finish locally before submitting 22237be8da07SAndreas Gruenbacher * the conflicting peer request. 22247be8da07SAndreas Gruenbacher */ 2225b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, &req->i); 22267be8da07SAndreas Gruenbacher if (err) { 2227e33b32deSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 2228b30ab791SAndreas Gruenbacher fail_postponed_requests(device, sector, size); 22297be8da07SAndreas Gruenbacher goto out; 22307be8da07SAndreas Gruenbacher } 22317be8da07SAndreas Gruenbacher goto repeat; 22327be8da07SAndreas Gruenbacher } 22337be8da07SAndreas Gruenbacher /* 22347be8da07SAndreas Gruenbacher * Remember to restart the conflicting requests after 22357be8da07SAndreas Gruenbacher * the new peer request has completed. 22367be8da07SAndreas Gruenbacher */ 22377be8da07SAndreas Gruenbacher peer_req->flags |= EE_RESTART_REQUESTS; 22387be8da07SAndreas Gruenbacher } 22397be8da07SAndreas Gruenbacher } 22407be8da07SAndreas Gruenbacher err = 0; 22417be8da07SAndreas Gruenbacher 22427be8da07SAndreas Gruenbacher out: 22437be8da07SAndreas Gruenbacher if (err) 2244b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 22457be8da07SAndreas Gruenbacher return err; 22467be8da07SAndreas Gruenbacher } 22477be8da07SAndreas Gruenbacher 2248b411b363SPhilipp Reisner /* mirrored write */ 2249bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi) 2250b411b363SPhilipp Reisner { 22519f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2252b30ab791SAndreas Gruenbacher struct drbd_device *device; 2253b411b363SPhilipp Reisner sector_t sector; 2254db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2255e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 22567be8da07SAndreas Gruenbacher u32 peer_seq = be32_to_cpu(p->seq_num); 2257b411b363SPhilipp Reisner int rw = WRITE; 2258b411b363SPhilipp Reisner u32 dp_flags; 2259302bdeaeSPhilipp Reisner int err, tp; 22607be8da07SAndreas Gruenbacher 22619f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 22629f4fe9adSAndreas Gruenbacher if (!peer_device) 22634a76b161SAndreas Gruenbacher return -EIO; 22649f4fe9adSAndreas Gruenbacher device = peer_device->device; 2265b411b363SPhilipp Reisner 2266b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 226782bc0194SAndreas Gruenbacher int err2; 2268b411b363SPhilipp Reisner 226969a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 227069a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 2271bde89a9eSAndreas Gruenbacher atomic_inc(&connection->current_epoch->epoch_size); 227269a22773SAndreas Gruenbacher err2 = drbd_drain_block(peer_device, pi->size); 227382bc0194SAndreas Gruenbacher if (!err) 227482bc0194SAndreas Gruenbacher err = err2; 227582bc0194SAndreas Gruenbacher return err; 2276b411b363SPhilipp Reisner } 2277b411b363SPhilipp Reisner 2278fcefa62eSAndreas Gruenbacher /* 2279fcefa62eSAndreas Gruenbacher * Corresponding put_ldev done either below (on various errors), or in 2280fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio, if we successfully submit the data at the 2281fcefa62eSAndreas Gruenbacher * end of this function. 2282fcefa62eSAndreas Gruenbacher */ 2283b411b363SPhilipp Reisner 2284b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2285a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, p->block_id, sector, pi); 2286db830c46SAndreas Gruenbacher if (!peer_req) { 2287b30ab791SAndreas Gruenbacher put_ldev(device); 228882bc0194SAndreas Gruenbacher return -EIO; 2289b411b363SPhilipp Reisner } 2290b411b363SPhilipp Reisner 2291a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_block; 2292b411b363SPhilipp Reisner 2293688593c5SLars Ellenberg dp_flags = be32_to_cpu(p->dp_flags); 229481f0ffd2SAndreas Gruenbacher rw |= wire_flags_to_bio(dp_flags); 2295a0fb3c47SLars Ellenberg if (pi->cmd == P_TRIM) { 2296a0fb3c47SLars Ellenberg struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); 2297a0fb3c47SLars Ellenberg peer_req->flags |= EE_IS_TRIM; 2298a0fb3c47SLars Ellenberg if (!blk_queue_discard(q)) 2299a0fb3c47SLars Ellenberg peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; 2300a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0); 2301a0fb3c47SLars Ellenberg D_ASSERT(peer_device, rw & REQ_DISCARD); 2302a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL); 2303a0fb3c47SLars Ellenberg } else if (peer_req->pages == NULL) { 23040b0ba1efSAndreas Gruenbacher D_ASSERT(device, peer_req->i.size == 0); 23050b0ba1efSAndreas Gruenbacher D_ASSERT(device, dp_flags & DP_FLUSH); 2306a73ff323SLars Ellenberg } 2307688593c5SLars Ellenberg 2308688593c5SLars Ellenberg if (dp_flags & DP_MAY_SET_IN_SYNC) 2309db830c46SAndreas Gruenbacher peer_req->flags |= EE_MAY_SET_IN_SYNC; 2310688593c5SLars Ellenberg 2311bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 2312bde89a9eSAndreas Gruenbacher peer_req->epoch = connection->current_epoch; 2313db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->epoch_size); 2314db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->active); 2315bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 2316b411b363SPhilipp Reisner 2317302bdeaeSPhilipp Reisner rcu_read_lock(); 23189f4fe9adSAndreas Gruenbacher tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 2319302bdeaeSPhilipp Reisner rcu_read_unlock(); 2320302bdeaeSPhilipp Reisner if (tp) { 2321302bdeaeSPhilipp Reisner peer_req->flags |= EE_IN_INTERVAL_TREE; 232269a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 23237be8da07SAndreas Gruenbacher if (err) 2324b411b363SPhilipp Reisner goto out_interrupted; 23250500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2326b30ab791SAndreas Gruenbacher err = handle_write_conflicts(device, peer_req); 23277be8da07SAndreas Gruenbacher if (err) { 23280500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 23297be8da07SAndreas Gruenbacher if (err == -ENOENT) { 2330b30ab791SAndreas Gruenbacher put_ldev(device); 233182bc0194SAndreas Gruenbacher return 0; 2332b411b363SPhilipp Reisner } 2333b411b363SPhilipp Reisner goto out_interrupted; 2334b411b363SPhilipp Reisner } 2335b874d231SPhilipp Reisner } else { 233669a22773SAndreas Gruenbacher update_peer_seq(peer_device, peer_seq); 23370500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2338b874d231SPhilipp Reisner } 2339a0fb3c47SLars Ellenberg /* if we use the zeroout fallback code, we process synchronously 2340a0fb3c47SLars Ellenberg * and we wait for all pending requests, respectively wait for 2341a0fb3c47SLars Ellenberg * active_ee to become empty in drbd_submit_peer_request(); 2342a0fb3c47SLars Ellenberg * better not add ourselves here. */ 2343a0fb3c47SLars Ellenberg if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) 2344a8cd15baSAndreas Gruenbacher list_add(&peer_req->w.list, &device->active_ee); 23450500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2346b411b363SPhilipp Reisner 2347b30ab791SAndreas Gruenbacher if (device->state.conn == C_SYNC_TARGET) 2348b30ab791SAndreas Gruenbacher wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 2349b6a370baSPhilipp Reisner 23509f4fe9adSAndreas Gruenbacher if (peer_device->connection->agreed_pro_version < 100) { 235144ed167dSPhilipp Reisner rcu_read_lock(); 23529f4fe9adSAndreas Gruenbacher switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) { 2353b411b363SPhilipp Reisner case DRBD_PROT_C: 2354303d1448SPhilipp Reisner dp_flags |= DP_SEND_WRITE_ACK; 2355303d1448SPhilipp Reisner break; 2356303d1448SPhilipp Reisner case DRBD_PROT_B: 2357303d1448SPhilipp Reisner dp_flags |= DP_SEND_RECEIVE_ACK; 2358303d1448SPhilipp Reisner break; 2359303d1448SPhilipp Reisner } 236044ed167dSPhilipp Reisner rcu_read_unlock(); 2361303d1448SPhilipp Reisner } 2362303d1448SPhilipp Reisner 2363303d1448SPhilipp Reisner if (dp_flags & DP_SEND_WRITE_ACK) { 2364303d1448SPhilipp Reisner peer_req->flags |= EE_SEND_WRITE_ACK; 2365b30ab791SAndreas Gruenbacher inc_unacked(device); 2366b411b363SPhilipp Reisner /* corresponding dec_unacked() in e_end_block() 2367b411b363SPhilipp Reisner * respective _drbd_clear_done_ee */ 2368303d1448SPhilipp Reisner } 2369303d1448SPhilipp Reisner 2370303d1448SPhilipp Reisner if (dp_flags & DP_SEND_RECEIVE_ACK) { 2371b411b363SPhilipp Reisner /* I really don't like it that the receiver thread 2372b411b363SPhilipp Reisner * sends on the msock, but anyways */ 237369a22773SAndreas Gruenbacher drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); 2374b411b363SPhilipp Reisner } 2375b411b363SPhilipp Reisner 2376b30ab791SAndreas Gruenbacher if (device->state.pdsk < D_INCONSISTENT) { 2377b411b363SPhilipp Reisner /* In case we have the only disk of the cluster, */ 2378b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 2379db830c46SAndreas Gruenbacher peer_req->flags |= EE_CALL_AL_COMPLETE_IO; 2380db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 23814dd726f0SLars Ellenberg drbd_al_begin_io(device, &peer_req->i); 2382b411b363SPhilipp Reisner } 2383b411b363SPhilipp Reisner 2384b30ab791SAndreas Gruenbacher err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); 238582bc0194SAndreas Gruenbacher if (!err) 238682bc0194SAndreas Gruenbacher return 0; 2387b411b363SPhilipp Reisner 238810f6d992SLars Ellenberg /* don't care for the reason here */ 2389d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 23900500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2391a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 2392b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 23930500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2394db830c46SAndreas Gruenbacher if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) 2395b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &peer_req->i); 239622cc37a9SLars Ellenberg 2397b411b363SPhilipp Reisner out_interrupted: 2398bde89a9eSAndreas Gruenbacher drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); 2399b30ab791SAndreas Gruenbacher put_ldev(device); 2400b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 240182bc0194SAndreas Gruenbacher return err; 2402b411b363SPhilipp Reisner } 2403b411b363SPhilipp Reisner 24040f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy, 24050f0601f4SLars Ellenberg * and current sync rate is above c_min_rate. 24060f0601f4SLars Ellenberg * 24070f0601f4SLars Ellenberg * To decide whether or not the lower device is busy, we use a scheme similar 24080f0601f4SLars Ellenberg * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" 24090f0601f4SLars Ellenberg * (more than 64 sectors) of activity we cannot account for with our own resync 24100f0601f4SLars Ellenberg * activity, it obviously is "busy". 24110f0601f4SLars Ellenberg * 24120f0601f4SLars Ellenberg * The current sync rate used here uses only the most recent two step marks, 24130f0601f4SLars Ellenberg * to have a short time average so we can react faster. 24140f0601f4SLars Ellenberg */ 2415e8299874SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) 2416e8299874SLars Ellenberg { 2417e8299874SLars Ellenberg struct lc_element *tmp; 2418e8299874SLars Ellenberg bool throttle = true; 2419e8299874SLars Ellenberg 2420e8299874SLars Ellenberg if (!drbd_rs_c_min_rate_throttle(device)) 2421e8299874SLars Ellenberg return false; 2422e8299874SLars Ellenberg 2423e8299874SLars Ellenberg spin_lock_irq(&device->al_lock); 2424e8299874SLars Ellenberg tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 2425e8299874SLars Ellenberg if (tmp) { 2426e8299874SLars Ellenberg struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 2427e8299874SLars Ellenberg if (test_bit(BME_PRIORITY, &bm_ext->flags)) 2428e8299874SLars Ellenberg throttle = false; 2429e8299874SLars Ellenberg /* Do not slow down if app IO is already waiting for this extent */ 2430e8299874SLars Ellenberg } 2431e8299874SLars Ellenberg spin_unlock_irq(&device->al_lock); 2432e8299874SLars Ellenberg 2433e8299874SLars Ellenberg return throttle; 2434e8299874SLars Ellenberg } 2435e8299874SLars Ellenberg 2436e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) 24370f0601f4SLars Ellenberg { 2438b30ab791SAndreas Gruenbacher struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; 24390f0601f4SLars Ellenberg unsigned long db, dt, dbdt; 2440daeda1ccSPhilipp Reisner unsigned int c_min_rate; 2441e8299874SLars Ellenberg int curr_events; 2442daeda1ccSPhilipp Reisner 2443daeda1ccSPhilipp Reisner rcu_read_lock(); 2444b30ab791SAndreas Gruenbacher c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; 2445daeda1ccSPhilipp Reisner rcu_read_unlock(); 24460f0601f4SLars Ellenberg 24470f0601f4SLars Ellenberg /* feature disabled? */ 2448daeda1ccSPhilipp Reisner if (c_min_rate == 0) 2449e8299874SLars Ellenberg return false; 2450e3555d85SPhilipp Reisner 24510f0601f4SLars Ellenberg curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + 24520f0601f4SLars Ellenberg (int)part_stat_read(&disk->part0, sectors[1]) - 2453b30ab791SAndreas Gruenbacher atomic_read(&device->rs_sect_ev); 2454b30ab791SAndreas Gruenbacher if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { 24550f0601f4SLars Ellenberg unsigned long rs_left; 24560f0601f4SLars Ellenberg int i; 24570f0601f4SLars Ellenberg 2458b30ab791SAndreas Gruenbacher device->rs_last_events = curr_events; 24590f0601f4SLars Ellenberg 24600f0601f4SLars Ellenberg /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 24610f0601f4SLars Ellenberg * approx. */ 2462b30ab791SAndreas Gruenbacher i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; 24632649f080SLars Ellenberg 2464b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 2465b30ab791SAndreas Gruenbacher rs_left = device->ov_left; 24662649f080SLars Ellenberg else 2467b30ab791SAndreas Gruenbacher rs_left = drbd_bm_total_weight(device) - device->rs_failed; 24680f0601f4SLars Ellenberg 2469b30ab791SAndreas Gruenbacher dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; 24700f0601f4SLars Ellenberg if (!dt) 24710f0601f4SLars Ellenberg dt++; 2472b30ab791SAndreas Gruenbacher db = device->rs_mark_left[i] - rs_left; 24730f0601f4SLars Ellenberg dbdt = Bit2KB(db/dt); 24740f0601f4SLars Ellenberg 2475daeda1ccSPhilipp Reisner if (dbdt > c_min_rate) 2476e8299874SLars Ellenberg return true; 24770f0601f4SLars Ellenberg } 2478e8299874SLars Ellenberg return false; 24790f0601f4SLars Ellenberg } 24800f0601f4SLars Ellenberg 2481bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) 2482b411b363SPhilipp Reisner { 24839f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2484b30ab791SAndreas Gruenbacher struct drbd_device *device; 2485b411b363SPhilipp Reisner sector_t sector; 24864a76b161SAndreas Gruenbacher sector_t capacity; 2487db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2488b411b363SPhilipp Reisner struct digest_info *di = NULL; 2489b18b37beSPhilipp Reisner int size, verb; 2490b411b363SPhilipp Reisner unsigned int fault_type; 2491e658983aSAndreas Gruenbacher struct p_block_req *p = pi->data; 24924a76b161SAndreas Gruenbacher 24939f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 24949f4fe9adSAndreas Gruenbacher if (!peer_device) 24954a76b161SAndreas Gruenbacher return -EIO; 24969f4fe9adSAndreas Gruenbacher device = peer_device->device; 2497b30ab791SAndreas Gruenbacher capacity = drbd_get_capacity(device->this_bdev); 2498b411b363SPhilipp Reisner 2499b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2500b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 2501b411b363SPhilipp Reisner 2502c670a398SAndreas Gruenbacher if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { 2503d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2504b411b363SPhilipp Reisner (unsigned long long)sector, size); 250582bc0194SAndreas Gruenbacher return -EINVAL; 2506b411b363SPhilipp Reisner } 2507b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) { 2508d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2509b411b363SPhilipp Reisner (unsigned long long)sector, size); 251082bc0194SAndreas Gruenbacher return -EINVAL; 2511b411b363SPhilipp Reisner } 2512b411b363SPhilipp Reisner 2513b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_UP_TO_DATE)) { 2514b18b37beSPhilipp Reisner verb = 1; 2515e2857216SAndreas Gruenbacher switch (pi->cmd) { 2516b18b37beSPhilipp Reisner case P_DATA_REQUEST: 251769a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); 2518b18b37beSPhilipp Reisner break; 2519b18b37beSPhilipp Reisner case P_RS_DATA_REQUEST: 2520b18b37beSPhilipp Reisner case P_CSUM_RS_REQUEST: 2521b18b37beSPhilipp Reisner case P_OV_REQUEST: 252269a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); 2523b18b37beSPhilipp Reisner break; 2524b18b37beSPhilipp Reisner case P_OV_REPLY: 2525b18b37beSPhilipp Reisner verb = 0; 2526b30ab791SAndreas Gruenbacher dec_rs_pending(device); 252769a22773SAndreas Gruenbacher drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); 2528b18b37beSPhilipp Reisner break; 2529b18b37beSPhilipp Reisner default: 253049ba9b1bSAndreas Gruenbacher BUG(); 2531b18b37beSPhilipp Reisner } 2532b18b37beSPhilipp Reisner if (verb && __ratelimit(&drbd_ratelimit_state)) 2533d0180171SAndreas Gruenbacher drbd_err(device, "Can not satisfy peer's read request, " 2534b411b363SPhilipp Reisner "no local data.\n"); 2535b18b37beSPhilipp Reisner 2536a821cc4aSLars Ellenberg /* drain possibly payload */ 253769a22773SAndreas Gruenbacher return drbd_drain_block(peer_device, pi->size); 2538b411b363SPhilipp Reisner } 2539b411b363SPhilipp Reisner 2540b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2541b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 2542b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 2543a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, 2544a0fb3c47SLars Ellenberg true /* has real payload */, GFP_NOIO); 2545db830c46SAndreas Gruenbacher if (!peer_req) { 2546b30ab791SAndreas Gruenbacher put_ldev(device); 254782bc0194SAndreas Gruenbacher return -ENOMEM; 2548b411b363SPhilipp Reisner } 2549b411b363SPhilipp Reisner 2550e2857216SAndreas Gruenbacher switch (pi->cmd) { 2551b411b363SPhilipp Reisner case P_DATA_REQUEST: 2552a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_data_req; 2553b411b363SPhilipp Reisner fault_type = DRBD_FAULT_DT_RD; 255480a40e43SLars Ellenberg /* application IO, don't drbd_rs_begin_io */ 255580a40e43SLars Ellenberg goto submit; 255680a40e43SLars Ellenberg 2557b411b363SPhilipp Reisner case P_RS_DATA_REQUEST: 2558a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_rsdata_req; 2559b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 25605f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2561b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2562b411b363SPhilipp Reisner break; 2563b411b363SPhilipp Reisner 2564b411b363SPhilipp Reisner case P_OV_REPLY: 2565b411b363SPhilipp Reisner case P_CSUM_RS_REQUEST: 2566b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2567e2857216SAndreas Gruenbacher di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); 2568b411b363SPhilipp Reisner if (!di) 2569b411b363SPhilipp Reisner goto out_free_e; 2570b411b363SPhilipp Reisner 2571e2857216SAndreas Gruenbacher di->digest_size = pi->size; 2572b411b363SPhilipp Reisner di->digest = (((char *)di)+sizeof(struct digest_info)); 2573b411b363SPhilipp Reisner 2574db830c46SAndreas Gruenbacher peer_req->digest = di; 2575db830c46SAndreas Gruenbacher peer_req->flags |= EE_HAS_DIGEST; 2576c36c3cedSLars Ellenberg 25779f4fe9adSAndreas Gruenbacher if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) 2578b411b363SPhilipp Reisner goto out_free_e; 2579b411b363SPhilipp Reisner 2580e2857216SAndreas Gruenbacher if (pi->cmd == P_CSUM_RS_REQUEST) { 25819f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 2582a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_csum_rs_req; 25835f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2584b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2585aaaba345SLars Ellenberg /* remember to report stats in drbd_resync_finished */ 2586aaaba345SLars Ellenberg device->use_csums = true; 2587e2857216SAndreas Gruenbacher } else if (pi->cmd == P_OV_REPLY) { 25882649f080SLars Ellenberg /* track progress, we may need to throttle */ 2589b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_in); 2590a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_reply; 2591b30ab791SAndreas Gruenbacher dec_rs_pending(device); 25920f0601f4SLars Ellenberg /* drbd_rs_begin_io done when we sent this request, 25930f0601f4SLars Ellenberg * but accounting still needs to be done. */ 25940f0601f4SLars Ellenberg goto submit_for_resync; 2595b411b363SPhilipp Reisner } 2596b411b363SPhilipp Reisner break; 2597b411b363SPhilipp Reisner 2598b411b363SPhilipp Reisner case P_OV_REQUEST: 2599b30ab791SAndreas Gruenbacher if (device->ov_start_sector == ~(sector_t)0 && 26009f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90) { 2601de228bbaSLars Ellenberg unsigned long now = jiffies; 2602de228bbaSLars Ellenberg int i; 2603b30ab791SAndreas Gruenbacher device->ov_start_sector = sector; 2604b30ab791SAndreas Gruenbacher device->ov_position = sector; 2605b30ab791SAndreas Gruenbacher device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); 2606b30ab791SAndreas Gruenbacher device->rs_total = device->ov_left; 2607de228bbaSLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 2608b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = device->ov_left; 2609b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 2610de228bbaSLars Ellenberg } 2611d0180171SAndreas Gruenbacher drbd_info(device, "Online Verify start sector: %llu\n", 2612b411b363SPhilipp Reisner (unsigned long long)sector); 2613b411b363SPhilipp Reisner } 2614a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_req; 2615b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2616b411b363SPhilipp Reisner break; 2617b411b363SPhilipp Reisner 2618b411b363SPhilipp Reisner default: 261949ba9b1bSAndreas Gruenbacher BUG(); 2620b411b363SPhilipp Reisner } 2621b411b363SPhilipp Reisner 26220f0601f4SLars Ellenberg /* Throttle, drbd_rs_begin_io and submit should become asynchronous 26230f0601f4SLars Ellenberg * wrt the receiver, but it is not as straightforward as it may seem. 26240f0601f4SLars Ellenberg * Various places in the resync start and stop logic assume resync 26250f0601f4SLars Ellenberg * requests are processed in order, requeuing this on the worker thread 26260f0601f4SLars Ellenberg * introduces a bunch of new code for synchronization between threads. 26270f0601f4SLars Ellenberg * 26280f0601f4SLars Ellenberg * Unlimited throttling before drbd_rs_begin_io may stall the resync 26290f0601f4SLars Ellenberg * "forever", throttling after drbd_rs_begin_io will lock that extent 26300f0601f4SLars Ellenberg * for application writes for the same time. For now, just throttle 26310f0601f4SLars Ellenberg * here, where the rest of the code expects the receiver to sleep for 26320f0601f4SLars Ellenberg * a while, anyways. 26330f0601f4SLars Ellenberg */ 2634b411b363SPhilipp Reisner 26350f0601f4SLars Ellenberg /* Throttle before drbd_rs_begin_io, as that locks out application IO; 26360f0601f4SLars Ellenberg * this defers syncer requests for some time, before letting at least 26370f0601f4SLars Ellenberg * on request through. The resync controller on the receiving side 26380f0601f4SLars Ellenberg * will adapt to the incoming rate accordingly. 26390f0601f4SLars Ellenberg * 26400f0601f4SLars Ellenberg * We cannot throttle here if remote is Primary/SyncTarget: 26410f0601f4SLars Ellenberg * we would also throttle its application reads. 26420f0601f4SLars Ellenberg * In that case, throttling is done on the SyncTarget only. 26430f0601f4SLars Ellenberg */ 2644b30ab791SAndreas Gruenbacher if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) 2645e3555d85SPhilipp Reisner schedule_timeout_uninterruptible(HZ/10); 2646b30ab791SAndreas Gruenbacher if (drbd_rs_begin_io(device, sector)) 264780a40e43SLars Ellenberg goto out_free_e; 2648b411b363SPhilipp Reisner 26490f0601f4SLars Ellenberg submit_for_resync: 2650b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 26510f0601f4SLars Ellenberg 265280a40e43SLars Ellenberg submit: 2653b30ab791SAndreas Gruenbacher inc_unacked(device); 26540500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2655a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->read_ee); 26560500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2657b411b363SPhilipp Reisner 2658b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) 265982bc0194SAndreas Gruenbacher return 0; 2660b411b363SPhilipp Reisner 266110f6d992SLars Ellenberg /* don't care for the reason here */ 2662d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 26630500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2664a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 26650500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 266622cc37a9SLars Ellenberg /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 266722cc37a9SLars Ellenberg 2668b411b363SPhilipp Reisner out_free_e: 2669b30ab791SAndreas Gruenbacher put_ldev(device); 2670b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 267182bc0194SAndreas Gruenbacher return -EIO; 2672b411b363SPhilipp Reisner } 2673b411b363SPhilipp Reisner 267469a22773SAndreas Gruenbacher /** 267569a22773SAndreas Gruenbacher * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries 267669a22773SAndreas Gruenbacher */ 267769a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) 2678b411b363SPhilipp Reisner { 267969a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2680b411b363SPhilipp Reisner int self, peer, rv = -100; 2681b411b363SPhilipp Reisner unsigned long ch_self, ch_peer; 268244ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_0p; 2683b411b363SPhilipp Reisner 2684b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & 1; 2685b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & 1; 2686b411b363SPhilipp Reisner 2687b30ab791SAndreas Gruenbacher ch_peer = device->p_uuid[UI_SIZE]; 2688b30ab791SAndreas Gruenbacher ch_self = device->comm_bm_set; 2689b411b363SPhilipp Reisner 269044ed167dSPhilipp Reisner rcu_read_lock(); 269169a22773SAndreas Gruenbacher after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; 269244ed167dSPhilipp Reisner rcu_read_unlock(); 269344ed167dSPhilipp Reisner switch (after_sb_0p) { 2694b411b363SPhilipp Reisner case ASB_CONSENSUS: 2695b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 2696b411b363SPhilipp Reisner case ASB_CALL_HELPER: 269744ed167dSPhilipp Reisner case ASB_VIOLENTLY: 2698d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2699b411b363SPhilipp Reisner break; 2700b411b363SPhilipp Reisner case ASB_DISCONNECT: 2701b411b363SPhilipp Reisner break; 2702b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2703b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 2704b411b363SPhilipp Reisner rv = -1; 2705b411b363SPhilipp Reisner break; 2706b411b363SPhilipp Reisner } 2707b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 2708b411b363SPhilipp Reisner rv = 1; 2709b411b363SPhilipp Reisner break; 2710b411b363SPhilipp Reisner } 2711b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */ 2712b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2713b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 2714b411b363SPhilipp Reisner rv = 1; 2715b411b363SPhilipp Reisner break; 2716b411b363SPhilipp Reisner } 2717b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 2718b411b363SPhilipp Reisner rv = -1; 2719b411b363SPhilipp Reisner break; 2720b411b363SPhilipp Reisner } 2721b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */ 2722d0180171SAndreas Gruenbacher drbd_warn(device, "Discard younger/older primary did not find a decision\n" 2723b411b363SPhilipp Reisner "Using discard-least-changes instead\n"); 2724b411b363SPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2725b411b363SPhilipp Reisner if (ch_peer == 0 && ch_self == 0) { 272669a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 2727b411b363SPhilipp Reisner ? -1 : 1; 2728b411b363SPhilipp Reisner break; 2729b411b363SPhilipp Reisner } else { 2730b411b363SPhilipp Reisner if (ch_peer == 0) { rv = 1; break; } 2731b411b363SPhilipp Reisner if (ch_self == 0) { rv = -1; break; } 2732b411b363SPhilipp Reisner } 273344ed167dSPhilipp Reisner if (after_sb_0p == ASB_DISCARD_ZERO_CHG) 2734b411b363SPhilipp Reisner break; 2735b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2736b411b363SPhilipp Reisner if (ch_self < ch_peer) 2737b411b363SPhilipp Reisner rv = -1; 2738b411b363SPhilipp Reisner else if (ch_self > ch_peer) 2739b411b363SPhilipp Reisner rv = 1; 2740b411b363SPhilipp Reisner else /* ( ch_self == ch_peer ) */ 2741b411b363SPhilipp Reisner /* Well, then use something else. */ 274269a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 2743b411b363SPhilipp Reisner ? -1 : 1; 2744b411b363SPhilipp Reisner break; 2745b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2746b411b363SPhilipp Reisner rv = -1; 2747b411b363SPhilipp Reisner break; 2748b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 2749b411b363SPhilipp Reisner rv = 1; 2750b411b363SPhilipp Reisner } 2751b411b363SPhilipp Reisner 2752b411b363SPhilipp Reisner return rv; 2753b411b363SPhilipp Reisner } 2754b411b363SPhilipp Reisner 275569a22773SAndreas Gruenbacher /** 275669a22773SAndreas Gruenbacher * drbd_asb_recover_1p - Recover after split-brain with one remaining primary 275769a22773SAndreas Gruenbacher */ 275869a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) 2759b411b363SPhilipp Reisner { 276069a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 27616184ea21SAndreas Gruenbacher int hg, rv = -100; 276244ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_1p; 2763b411b363SPhilipp Reisner 276444ed167dSPhilipp Reisner rcu_read_lock(); 276569a22773SAndreas Gruenbacher after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; 276644ed167dSPhilipp Reisner rcu_read_unlock(); 276744ed167dSPhilipp Reisner switch (after_sb_1p) { 2768b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2769b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2770b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2771b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2772b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 277344ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2774d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2775b411b363SPhilipp Reisner break; 2776b411b363SPhilipp Reisner case ASB_DISCONNECT: 2777b411b363SPhilipp Reisner break; 2778b411b363SPhilipp Reisner case ASB_CONSENSUS: 277969a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2780b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_SECONDARY) 2781b411b363SPhilipp Reisner rv = hg; 2782b30ab791SAndreas Gruenbacher if (hg == 1 && device->state.role == R_PRIMARY) 2783b411b363SPhilipp Reisner rv = hg; 2784b411b363SPhilipp Reisner break; 2785b411b363SPhilipp Reisner case ASB_VIOLENTLY: 278669a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 2787b411b363SPhilipp Reisner break; 2788b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 2789b30ab791SAndreas Gruenbacher return device->state.role == R_PRIMARY ? 1 : -1; 2790b411b363SPhilipp Reisner case ASB_CALL_HELPER: 279169a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2792b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_PRIMARY) { 2793bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 2794bb437946SAndreas Gruenbacher 2795b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2796b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 2797b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 2798b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 2799bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 2800b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 2801b411b363SPhilipp Reisner } else { 2802d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 2803b411b363SPhilipp Reisner rv = hg; 2804b411b363SPhilipp Reisner } 2805b411b363SPhilipp Reisner } else 2806b411b363SPhilipp Reisner rv = hg; 2807b411b363SPhilipp Reisner } 2808b411b363SPhilipp Reisner 2809b411b363SPhilipp Reisner return rv; 2810b411b363SPhilipp Reisner } 2811b411b363SPhilipp Reisner 281269a22773SAndreas Gruenbacher /** 281369a22773SAndreas Gruenbacher * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries 281469a22773SAndreas Gruenbacher */ 281569a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) 2816b411b363SPhilipp Reisner { 281769a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 28186184ea21SAndreas Gruenbacher int hg, rv = -100; 281944ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_2p; 2820b411b363SPhilipp Reisner 282144ed167dSPhilipp Reisner rcu_read_lock(); 282269a22773SAndreas Gruenbacher after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; 282344ed167dSPhilipp Reisner rcu_read_unlock(); 282444ed167dSPhilipp Reisner switch (after_sb_2p) { 2825b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2826b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2827b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2828b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2829b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 2830b411b363SPhilipp Reisner case ASB_CONSENSUS: 2831b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 283244ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2833d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2834b411b363SPhilipp Reisner break; 2835b411b363SPhilipp Reisner case ASB_VIOLENTLY: 283669a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 2837b411b363SPhilipp Reisner break; 2838b411b363SPhilipp Reisner case ASB_DISCONNECT: 2839b411b363SPhilipp Reisner break; 2840b411b363SPhilipp Reisner case ASB_CALL_HELPER: 284169a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2842b411b363SPhilipp Reisner if (hg == -1) { 2843bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 2844bb437946SAndreas Gruenbacher 2845b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2846b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 2847b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 2848b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 2849bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 2850b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 2851b411b363SPhilipp Reisner } else { 2852d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 2853b411b363SPhilipp Reisner rv = hg; 2854b411b363SPhilipp Reisner } 2855b411b363SPhilipp Reisner } else 2856b411b363SPhilipp Reisner rv = hg; 2857b411b363SPhilipp Reisner } 2858b411b363SPhilipp Reisner 2859b411b363SPhilipp Reisner return rv; 2860b411b363SPhilipp Reisner } 2861b411b363SPhilipp Reisner 2862b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, 2863b411b363SPhilipp Reisner u64 bits, u64 flags) 2864b411b363SPhilipp Reisner { 2865b411b363SPhilipp Reisner if (!uuid) { 2866d0180171SAndreas Gruenbacher drbd_info(device, "%s uuid info vanished while I was looking!\n", text); 2867b411b363SPhilipp Reisner return; 2868b411b363SPhilipp Reisner } 2869d0180171SAndreas Gruenbacher drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", 2870b411b363SPhilipp Reisner text, 2871b411b363SPhilipp Reisner (unsigned long long)uuid[UI_CURRENT], 2872b411b363SPhilipp Reisner (unsigned long long)uuid[UI_BITMAP], 2873b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_START], 2874b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_END], 2875b411b363SPhilipp Reisner (unsigned long long)bits, 2876b411b363SPhilipp Reisner (unsigned long long)flags); 2877b411b363SPhilipp Reisner } 2878b411b363SPhilipp Reisner 2879b411b363SPhilipp Reisner /* 2880b411b363SPhilipp Reisner 100 after split brain try auto recover 2881b411b363SPhilipp Reisner 2 C_SYNC_SOURCE set BitMap 2882b411b363SPhilipp Reisner 1 C_SYNC_SOURCE use BitMap 2883b411b363SPhilipp Reisner 0 no Sync 2884b411b363SPhilipp Reisner -1 C_SYNC_TARGET use BitMap 2885b411b363SPhilipp Reisner -2 C_SYNC_TARGET set BitMap 2886b411b363SPhilipp Reisner -100 after split brain, disconnect 2887b411b363SPhilipp Reisner -1000 unrelated data 28884a23f264SPhilipp Reisner -1091 requires proto 91 28894a23f264SPhilipp Reisner -1096 requires proto 96 2890b411b363SPhilipp Reisner */ 289144a4d551SLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local) 2892b411b363SPhilipp Reisner { 289344a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 289444a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 2895b411b363SPhilipp Reisner u64 self, peer; 2896b411b363SPhilipp Reisner int i, j; 2897b411b363SPhilipp Reisner 2898b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 2899b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 2900b411b363SPhilipp Reisner 2901b411b363SPhilipp Reisner *rule_nr = 10; 2902b411b363SPhilipp Reisner if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) 2903b411b363SPhilipp Reisner return 0; 2904b411b363SPhilipp Reisner 2905b411b363SPhilipp Reisner *rule_nr = 20; 2906b411b363SPhilipp Reisner if ((self == UUID_JUST_CREATED || self == (u64)0) && 2907b411b363SPhilipp Reisner peer != UUID_JUST_CREATED) 2908b411b363SPhilipp Reisner return -2; 2909b411b363SPhilipp Reisner 2910b411b363SPhilipp Reisner *rule_nr = 30; 2911b411b363SPhilipp Reisner if (self != UUID_JUST_CREATED && 2912b411b363SPhilipp Reisner (peer == UUID_JUST_CREATED || peer == (u64)0)) 2913b411b363SPhilipp Reisner return 2; 2914b411b363SPhilipp Reisner 2915b411b363SPhilipp Reisner if (self == peer) { 2916b411b363SPhilipp Reisner int rct, dc; /* roles at crash time */ 2917b411b363SPhilipp Reisner 2918b30ab791SAndreas Gruenbacher if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 2919b411b363SPhilipp Reisner 292044a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 29214a23f264SPhilipp Reisner return -1091; 2922b411b363SPhilipp Reisner 2923b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 2924b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 2925d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); 2926b30ab791SAndreas Gruenbacher drbd_uuid_move_history(device); 2927b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 2928b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_BITMAP] = 0; 2929b411b363SPhilipp Reisner 2930b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 2931b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 2932b411b363SPhilipp Reisner *rule_nr = 34; 2933b411b363SPhilipp Reisner } else { 2934d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); 2935b411b363SPhilipp Reisner *rule_nr = 36; 2936b411b363SPhilipp Reisner } 2937b411b363SPhilipp Reisner 2938b411b363SPhilipp Reisner return 1; 2939b411b363SPhilipp Reisner } 2940b411b363SPhilipp Reisner 2941b30ab791SAndreas Gruenbacher if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 2942b411b363SPhilipp Reisner 294344a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 29444a23f264SPhilipp Reisner return -1091; 2945b411b363SPhilipp Reisner 2946b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 2947b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 2948d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); 2949b411b363SPhilipp Reisner 2950b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; 2951b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; 2952b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = 0UL; 2953b411b363SPhilipp Reisner 2954b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 2955b411b363SPhilipp Reisner *rule_nr = 35; 2956b411b363SPhilipp Reisner } else { 2957d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); 2958b411b363SPhilipp Reisner *rule_nr = 37; 2959b411b363SPhilipp Reisner } 2960b411b363SPhilipp Reisner 2961b411b363SPhilipp Reisner return -1; 2962b411b363SPhilipp Reisner } 2963b411b363SPhilipp Reisner 2964b411b363SPhilipp Reisner /* Common power [off|failure] */ 2965b30ab791SAndreas Gruenbacher rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + 2966b30ab791SAndreas Gruenbacher (device->p_uuid[UI_FLAGS] & 2); 2967b411b363SPhilipp Reisner /* lowest bit is set when we were primary, 2968b411b363SPhilipp Reisner * next bit (weight 2) is set when peer was primary */ 2969b411b363SPhilipp Reisner *rule_nr = 40; 2970b411b363SPhilipp Reisner 2971b411b363SPhilipp Reisner switch (rct) { 2972b411b363SPhilipp Reisner case 0: /* !self_pri && !peer_pri */ return 0; 2973b411b363SPhilipp Reisner case 1: /* self_pri && !peer_pri */ return 1; 2974b411b363SPhilipp Reisner case 2: /* !self_pri && peer_pri */ return -1; 2975b411b363SPhilipp Reisner case 3: /* self_pri && peer_pri */ 297644a4d551SLars Ellenberg dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); 2977b411b363SPhilipp Reisner return dc ? -1 : 1; 2978b411b363SPhilipp Reisner } 2979b411b363SPhilipp Reisner } 2980b411b363SPhilipp Reisner 2981b411b363SPhilipp Reisner *rule_nr = 50; 2982b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 2983b411b363SPhilipp Reisner if (self == peer) 2984b411b363SPhilipp Reisner return -1; 2985b411b363SPhilipp Reisner 2986b411b363SPhilipp Reisner *rule_nr = 51; 2987b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 2988b411b363SPhilipp Reisner if (self == peer) { 298944a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 2990b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 2991b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 2992b30ab791SAndreas Gruenbacher peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 2993b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 2994b411b363SPhilipp Reisner resync as sync source modifications of the peer's UUIDs. */ 2995b411b363SPhilipp Reisner 299644a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 29974a23f264SPhilipp Reisner return -1091; 2998b411b363SPhilipp Reisner 2999b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 3000b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; 30014a23f264SPhilipp Reisner 3002d0180171SAndreas Gruenbacher drbd_info(device, "Lost last syncUUID packet, corrected:\n"); 3003b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 30044a23f264SPhilipp Reisner 3005b411b363SPhilipp Reisner return -1; 3006b411b363SPhilipp Reisner } 3007b411b363SPhilipp Reisner } 3008b411b363SPhilipp Reisner 3009b411b363SPhilipp Reisner *rule_nr = 60; 3010b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 3011b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3012b30ab791SAndreas Gruenbacher peer = device->p_uuid[i] & ~((u64)1); 3013b411b363SPhilipp Reisner if (self == peer) 3014b411b363SPhilipp Reisner return -2; 3015b411b363SPhilipp Reisner } 3016b411b363SPhilipp Reisner 3017b411b363SPhilipp Reisner *rule_nr = 70; 3018b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3019b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3020b411b363SPhilipp Reisner if (self == peer) 3021b411b363SPhilipp Reisner return 1; 3022b411b363SPhilipp Reisner 3023b411b363SPhilipp Reisner *rule_nr = 71; 3024b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 3025b411b363SPhilipp Reisner if (self == peer) { 302644a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 3027b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 3028b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 3029b30ab791SAndreas Gruenbacher self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 3030b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 3031b411b363SPhilipp Reisner resync as sync source modifications of our UUIDs. */ 3032b411b363SPhilipp Reisner 303344a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 30344a23f264SPhilipp Reisner return -1091; 3035b411b363SPhilipp Reisner 3036b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 3037b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); 3038b411b363SPhilipp Reisner 3039d0180171SAndreas Gruenbacher drbd_info(device, "Last syncUUID did not get through, corrected:\n"); 3040b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 3041b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 3042b411b363SPhilipp Reisner 3043b411b363SPhilipp Reisner return 1; 3044b411b363SPhilipp Reisner } 3045b411b363SPhilipp Reisner } 3046b411b363SPhilipp Reisner 3047b411b363SPhilipp Reisner 3048b411b363SPhilipp Reisner *rule_nr = 80; 3049b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3050b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3051b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3052b411b363SPhilipp Reisner if (self == peer) 3053b411b363SPhilipp Reisner return 2; 3054b411b363SPhilipp Reisner } 3055b411b363SPhilipp Reisner 3056b411b363SPhilipp Reisner *rule_nr = 90; 3057b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3058b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 3059b411b363SPhilipp Reisner if (self == peer && self != ((u64)0)) 3060b411b363SPhilipp Reisner return 100; 3061b411b363SPhilipp Reisner 3062b411b363SPhilipp Reisner *rule_nr = 100; 3063b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3064b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3065b411b363SPhilipp Reisner for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { 3066b30ab791SAndreas Gruenbacher peer = device->p_uuid[j] & ~((u64)1); 3067b411b363SPhilipp Reisner if (self == peer) 3068b411b363SPhilipp Reisner return -100; 3069b411b363SPhilipp Reisner } 3070b411b363SPhilipp Reisner } 3071b411b363SPhilipp Reisner 3072b411b363SPhilipp Reisner return -1000; 3073b411b363SPhilipp Reisner } 3074b411b363SPhilipp Reisner 3075b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or 3076b411b363SPhilipp Reisner CONN_MASK (-1) on failure. 3077b411b363SPhilipp Reisner */ 307869a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, 307969a22773SAndreas Gruenbacher enum drbd_role peer_role, 3080b411b363SPhilipp Reisner enum drbd_disk_state peer_disk) __must_hold(local) 3081b411b363SPhilipp Reisner { 308269a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 3083b411b363SPhilipp Reisner enum drbd_conns rv = C_MASK; 3084b411b363SPhilipp Reisner enum drbd_disk_state mydisk; 308544ed167dSPhilipp Reisner struct net_conf *nc; 30866dff2902SAndreas Gruenbacher int hg, rule_nr, rr_conflict, tentative; 3087b411b363SPhilipp Reisner 3088b30ab791SAndreas Gruenbacher mydisk = device->state.disk; 3089b411b363SPhilipp Reisner if (mydisk == D_NEGOTIATING) 3090b30ab791SAndreas Gruenbacher mydisk = device->new_state_tmp.disk; 3091b411b363SPhilipp Reisner 3092d0180171SAndreas Gruenbacher drbd_info(device, "drbd_sync_handshake:\n"); 30939f2247bbSPhilipp Reisner 3094b30ab791SAndreas Gruenbacher spin_lock_irq(&device->ldev->md.uuid_lock); 3095b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); 3096b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, 3097b30ab791SAndreas Gruenbacher device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 3098b411b363SPhilipp Reisner 3099b30ab791SAndreas Gruenbacher hg = drbd_uuid_compare(device, &rule_nr); 3100b30ab791SAndreas Gruenbacher spin_unlock_irq(&device->ldev->md.uuid_lock); 3101b411b363SPhilipp Reisner 3102d0180171SAndreas Gruenbacher drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 3103b411b363SPhilipp Reisner 3104b411b363SPhilipp Reisner if (hg == -1000) { 3105d0180171SAndreas Gruenbacher drbd_alert(device, "Unrelated data, aborting!\n"); 3106b411b363SPhilipp Reisner return C_MASK; 3107b411b363SPhilipp Reisner } 31084a23f264SPhilipp Reisner if (hg < -1000) { 3109d0180171SAndreas Gruenbacher drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); 3110b411b363SPhilipp Reisner return C_MASK; 3111b411b363SPhilipp Reisner } 3112b411b363SPhilipp Reisner 3113b411b363SPhilipp Reisner if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || 3114b411b363SPhilipp Reisner (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { 3115b411b363SPhilipp Reisner int f = (hg == -100) || abs(hg) == 2; 3116b411b363SPhilipp Reisner hg = mydisk > D_INCONSISTENT ? 1 : -1; 3117b411b363SPhilipp Reisner if (f) 3118b411b363SPhilipp Reisner hg = hg*2; 3119d0180171SAndreas Gruenbacher drbd_info(device, "Becoming sync %s due to disk states.\n", 3120b411b363SPhilipp Reisner hg > 0 ? "source" : "target"); 3121b411b363SPhilipp Reisner } 3122b411b363SPhilipp Reisner 31233a11a487SAdam Gandelman if (abs(hg) == 100) 3124b30ab791SAndreas Gruenbacher drbd_khelper(device, "initial-split-brain"); 31253a11a487SAdam Gandelman 312644ed167dSPhilipp Reisner rcu_read_lock(); 312769a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 312844ed167dSPhilipp Reisner 312944ed167dSPhilipp Reisner if (hg == 100 || (hg == -100 && nc->always_asbp)) { 3130b30ab791SAndreas Gruenbacher int pcount = (device->state.role == R_PRIMARY) 3131b411b363SPhilipp Reisner + (peer_role == R_PRIMARY); 3132b411b363SPhilipp Reisner int forced = (hg == -100); 3133b411b363SPhilipp Reisner 3134b411b363SPhilipp Reisner switch (pcount) { 3135b411b363SPhilipp Reisner case 0: 313669a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3137b411b363SPhilipp Reisner break; 3138b411b363SPhilipp Reisner case 1: 313969a22773SAndreas Gruenbacher hg = drbd_asb_recover_1p(peer_device); 3140b411b363SPhilipp Reisner break; 3141b411b363SPhilipp Reisner case 2: 314269a22773SAndreas Gruenbacher hg = drbd_asb_recover_2p(peer_device); 3143b411b363SPhilipp Reisner break; 3144b411b363SPhilipp Reisner } 3145b411b363SPhilipp Reisner if (abs(hg) < 100) { 3146d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, %d primaries, " 3147b411b363SPhilipp Reisner "automatically solved. Sync from %s node\n", 3148b411b363SPhilipp Reisner pcount, (hg < 0) ? "peer" : "this"); 3149b411b363SPhilipp Reisner if (forced) { 3150d0180171SAndreas Gruenbacher drbd_warn(device, "Doing a full sync, since" 3151b411b363SPhilipp Reisner " UUIDs where ambiguous.\n"); 3152b411b363SPhilipp Reisner hg = hg*2; 3153b411b363SPhilipp Reisner } 3154b411b363SPhilipp Reisner } 3155b411b363SPhilipp Reisner } 3156b411b363SPhilipp Reisner 3157b411b363SPhilipp Reisner if (hg == -100) { 3158b30ab791SAndreas Gruenbacher if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) 3159b411b363SPhilipp Reisner hg = -1; 3160b30ab791SAndreas Gruenbacher if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) 3161b411b363SPhilipp Reisner hg = 1; 3162b411b363SPhilipp Reisner 3163b411b363SPhilipp Reisner if (abs(hg) < 100) 3164d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, manually solved. " 3165b411b363SPhilipp Reisner "Sync from %s node\n", 3166b411b363SPhilipp Reisner (hg < 0) ? "peer" : "this"); 3167b411b363SPhilipp Reisner } 316844ed167dSPhilipp Reisner rr_conflict = nc->rr_conflict; 31696dff2902SAndreas Gruenbacher tentative = nc->tentative; 317044ed167dSPhilipp Reisner rcu_read_unlock(); 3171b411b363SPhilipp Reisner 3172b411b363SPhilipp Reisner if (hg == -100) { 3173580b9767SLars Ellenberg /* FIXME this log message is not correct if we end up here 3174580b9767SLars Ellenberg * after an attempted attach on a diskless node. 3175580b9767SLars Ellenberg * We just refuse to attach -- well, we drop the "connection" 3176580b9767SLars Ellenberg * to that disk, in a way... */ 3177d0180171SAndreas Gruenbacher drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); 3178b30ab791SAndreas Gruenbacher drbd_khelper(device, "split-brain"); 3179b411b363SPhilipp Reisner return C_MASK; 3180b411b363SPhilipp Reisner } 3181b411b363SPhilipp Reisner 3182b411b363SPhilipp Reisner if (hg > 0 && mydisk <= D_INCONSISTENT) { 3183d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); 3184b411b363SPhilipp Reisner return C_MASK; 3185b411b363SPhilipp Reisner } 3186b411b363SPhilipp Reisner 3187b411b363SPhilipp Reisner if (hg < 0 && /* by intention we do not use mydisk here. */ 3188b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { 318944ed167dSPhilipp Reisner switch (rr_conflict) { 3190b411b363SPhilipp Reisner case ASB_CALL_HELPER: 3191b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost"); 3192b411b363SPhilipp Reisner /* fall through */ 3193b411b363SPhilipp Reisner case ASB_DISCONNECT: 3194d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); 3195b411b363SPhilipp Reisner return C_MASK; 3196b411b363SPhilipp Reisner case ASB_VIOLENTLY: 3197d0180171SAndreas Gruenbacher drbd_warn(device, "Becoming SyncTarget, violating the stable-data" 3198b411b363SPhilipp Reisner "assumption\n"); 3199b411b363SPhilipp Reisner } 3200b411b363SPhilipp Reisner } 3201b411b363SPhilipp Reisner 320269a22773SAndreas Gruenbacher if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { 3203cf14c2e9SPhilipp Reisner if (hg == 0) 3204d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); 3205cf14c2e9SPhilipp Reisner else 3206d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", 3207cf14c2e9SPhilipp Reisner drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), 3208cf14c2e9SPhilipp Reisner abs(hg) >= 2 ? "full" : "bit-map based"); 3209cf14c2e9SPhilipp Reisner return C_MASK; 3210cf14c2e9SPhilipp Reisner } 3211cf14c2e9SPhilipp Reisner 3212b411b363SPhilipp Reisner if (abs(hg) >= 2) { 3213d0180171SAndreas Gruenbacher drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 3214b30ab791SAndreas Gruenbacher if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", 321520ceb2b2SLars Ellenberg BM_LOCKED_SET_ALLOWED)) 3216b411b363SPhilipp Reisner return C_MASK; 3217b411b363SPhilipp Reisner } 3218b411b363SPhilipp Reisner 3219b411b363SPhilipp Reisner if (hg > 0) { /* become sync source. */ 3220b411b363SPhilipp Reisner rv = C_WF_BITMAP_S; 3221b411b363SPhilipp Reisner } else if (hg < 0) { /* become sync target */ 3222b411b363SPhilipp Reisner rv = C_WF_BITMAP_T; 3223b411b363SPhilipp Reisner } else { 3224b411b363SPhilipp Reisner rv = C_CONNECTED; 3225b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device)) { 3226d0180171SAndreas Gruenbacher drbd_info(device, "No resync, but %lu bits in bitmap!\n", 3227b30ab791SAndreas Gruenbacher drbd_bm_total_weight(device)); 3228b411b363SPhilipp Reisner } 3229b411b363SPhilipp Reisner } 3230b411b363SPhilipp Reisner 3231b411b363SPhilipp Reisner return rv; 3232b411b363SPhilipp Reisner } 3233b411b363SPhilipp Reisner 3234f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) 3235b411b363SPhilipp Reisner { 3236b411b363SPhilipp Reisner /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 3237f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_REMOTE) 3238f179d76dSPhilipp Reisner return ASB_DISCARD_LOCAL; 3239b411b363SPhilipp Reisner 3240b411b363SPhilipp Reisner /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 3241f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_LOCAL) 3242f179d76dSPhilipp Reisner return ASB_DISCARD_REMOTE; 3243b411b363SPhilipp Reisner 3244b411b363SPhilipp Reisner /* everything else is valid if they are equal on both sides. */ 3245f179d76dSPhilipp Reisner return peer; 3246b411b363SPhilipp Reisner } 3247b411b363SPhilipp Reisner 3248bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) 3249b411b363SPhilipp Reisner { 3250e658983aSAndreas Gruenbacher struct p_protocol *p = pi->data; 3251036b17eaSPhilipp Reisner enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 3252036b17eaSPhilipp Reisner int p_proto, p_discard_my_data, p_two_primaries, cf; 3253036b17eaSPhilipp Reisner struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; 3254036b17eaSPhilipp Reisner char integrity_alg[SHARED_SECRET_MAX] = ""; 3255accdbcc5SAndreas Gruenbacher struct crypto_hash *peer_integrity_tfm = NULL; 32567aca6c75SPhilipp Reisner void *int_dig_in = NULL, *int_dig_vv = NULL; 3257b411b363SPhilipp Reisner 3258b411b363SPhilipp Reisner p_proto = be32_to_cpu(p->protocol); 3259b411b363SPhilipp Reisner p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 3260b411b363SPhilipp Reisner p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 3261b411b363SPhilipp Reisner p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 3262b411b363SPhilipp Reisner p_two_primaries = be32_to_cpu(p->two_primaries); 3263cf14c2e9SPhilipp Reisner cf = be32_to_cpu(p->conn_flags); 32646139f60dSAndreas Gruenbacher p_discard_my_data = cf & CF_DISCARD_MY_DATA; 3265cf14c2e9SPhilipp Reisner 3266bde89a9eSAndreas Gruenbacher if (connection->agreed_pro_version >= 87) { 326786db0618SAndreas Gruenbacher int err; 326886db0618SAndreas Gruenbacher 326988104ca4SAndreas Gruenbacher if (pi->size > sizeof(integrity_alg)) 327086db0618SAndreas Gruenbacher return -EIO; 3271bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, integrity_alg, pi->size); 327286db0618SAndreas Gruenbacher if (err) 327386db0618SAndreas Gruenbacher return err; 327488104ca4SAndreas Gruenbacher integrity_alg[SHARED_SECRET_MAX - 1] = 0; 3275036b17eaSPhilipp Reisner } 327686db0618SAndreas Gruenbacher 32777d4c782cSAndreas Gruenbacher if (pi->cmd != P_PROTOCOL_UPDATE) { 3278bde89a9eSAndreas Gruenbacher clear_bit(CONN_DRY_RUN, &connection->flags); 3279cf14c2e9SPhilipp Reisner 3280cf14c2e9SPhilipp Reisner if (cf & CF_DRY_RUN) 3281bde89a9eSAndreas Gruenbacher set_bit(CONN_DRY_RUN, &connection->flags); 3282b411b363SPhilipp Reisner 328344ed167dSPhilipp Reisner rcu_read_lock(); 3284bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 328544ed167dSPhilipp Reisner 3286036b17eaSPhilipp Reisner if (p_proto != nc->wire_protocol) { 32871ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "protocol"); 328844ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3289b411b363SPhilipp Reisner } 3290b411b363SPhilipp Reisner 3291f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { 32921ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); 329344ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3294b411b363SPhilipp Reisner } 3295b411b363SPhilipp Reisner 3296f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { 32971ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); 329844ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3299b411b363SPhilipp Reisner } 3300b411b363SPhilipp Reisner 3301f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { 33021ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); 330344ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3304b411b363SPhilipp Reisner } 3305b411b363SPhilipp Reisner 33066139f60dSAndreas Gruenbacher if (p_discard_my_data && nc->discard_my_data) { 33071ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); 330844ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3309b411b363SPhilipp Reisner } 3310b411b363SPhilipp Reisner 331144ed167dSPhilipp Reisner if (p_two_primaries != nc->two_primaries) { 33121ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); 331344ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3314b411b363SPhilipp Reisner } 3315b411b363SPhilipp Reisner 3316036b17eaSPhilipp Reisner if (strcmp(integrity_alg, nc->integrity_alg)) { 33171ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); 3318036b17eaSPhilipp Reisner goto disconnect_rcu_unlock; 3319036b17eaSPhilipp Reisner } 3320036b17eaSPhilipp Reisner 332186db0618SAndreas Gruenbacher rcu_read_unlock(); 3322fbc12f45SAndreas Gruenbacher } 33237d4c782cSAndreas Gruenbacher 33247d4c782cSAndreas Gruenbacher if (integrity_alg[0]) { 33257d4c782cSAndreas Gruenbacher int hash_size; 33267d4c782cSAndreas Gruenbacher 33277d4c782cSAndreas Gruenbacher /* 33287d4c782cSAndreas Gruenbacher * We can only change the peer data integrity algorithm 33297d4c782cSAndreas Gruenbacher * here. Changing our own data integrity algorithm 33307d4c782cSAndreas Gruenbacher * requires that we send a P_PROTOCOL_UPDATE packet at 33317d4c782cSAndreas Gruenbacher * the same time; otherwise, the peer has no way to 33327d4c782cSAndreas Gruenbacher * tell between which packets the algorithm should 33337d4c782cSAndreas Gruenbacher * change. 33347d4c782cSAndreas Gruenbacher */ 33357d4c782cSAndreas Gruenbacher 33367d4c782cSAndreas Gruenbacher peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); 33377d4c782cSAndreas Gruenbacher if (!peer_integrity_tfm) { 33381ec861ebSAndreas Gruenbacher drbd_err(connection, "peer data-integrity-alg %s not supported\n", 33397d4c782cSAndreas Gruenbacher integrity_alg); 3340b411b363SPhilipp Reisner goto disconnect; 3341b411b363SPhilipp Reisner } 3342b411b363SPhilipp Reisner 33437d4c782cSAndreas Gruenbacher hash_size = crypto_hash_digestsize(peer_integrity_tfm); 33447d4c782cSAndreas Gruenbacher int_dig_in = kmalloc(hash_size, GFP_KERNEL); 33457d4c782cSAndreas Gruenbacher int_dig_vv = kmalloc(hash_size, GFP_KERNEL); 33467d4c782cSAndreas Gruenbacher if (!(int_dig_in && int_dig_vv)) { 33471ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); 33487d4c782cSAndreas Gruenbacher goto disconnect; 33497d4c782cSAndreas Gruenbacher } 33507d4c782cSAndreas Gruenbacher } 33517d4c782cSAndreas Gruenbacher 33527d4c782cSAndreas Gruenbacher new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 33537d4c782cSAndreas Gruenbacher if (!new_net_conf) { 33541ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of new net_conf failed\n"); 3355b411b363SPhilipp Reisner goto disconnect; 3356b411b363SPhilipp Reisner } 3357b411b363SPhilipp Reisner 3358bde89a9eSAndreas Gruenbacher mutex_lock(&connection->data.mutex); 33590500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 3360bde89a9eSAndreas Gruenbacher old_net_conf = connection->net_conf; 33617d4c782cSAndreas Gruenbacher *new_net_conf = *old_net_conf; 3362b411b363SPhilipp Reisner 33637d4c782cSAndreas Gruenbacher new_net_conf->wire_protocol = p_proto; 33647d4c782cSAndreas Gruenbacher new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); 33657d4c782cSAndreas Gruenbacher new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); 33667d4c782cSAndreas Gruenbacher new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); 33677d4c782cSAndreas Gruenbacher new_net_conf->two_primaries = p_two_primaries; 3368b411b363SPhilipp Reisner 3369bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 33700500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3371bde89a9eSAndreas Gruenbacher mutex_unlock(&connection->data.mutex); 3372b411b363SPhilipp Reisner 3373bde89a9eSAndreas Gruenbacher crypto_free_hash(connection->peer_integrity_tfm); 3374bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_in); 3375bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_vv); 3376bde89a9eSAndreas Gruenbacher connection->peer_integrity_tfm = peer_integrity_tfm; 3377bde89a9eSAndreas Gruenbacher connection->int_dig_in = int_dig_in; 3378bde89a9eSAndreas Gruenbacher connection->int_dig_vv = int_dig_vv; 3379b411b363SPhilipp Reisner 33807d4c782cSAndreas Gruenbacher if (strcmp(old_net_conf->integrity_alg, integrity_alg)) 33811ec861ebSAndreas Gruenbacher drbd_info(connection, "peer data-integrity-alg: %s\n", 33827d4c782cSAndreas Gruenbacher integrity_alg[0] ? integrity_alg : "(none)"); 3383b411b363SPhilipp Reisner 33847d4c782cSAndreas Gruenbacher synchronize_rcu(); 33857d4c782cSAndreas Gruenbacher kfree(old_net_conf); 338682bc0194SAndreas Gruenbacher return 0; 3387b411b363SPhilipp Reisner 338844ed167dSPhilipp Reisner disconnect_rcu_unlock: 338944ed167dSPhilipp Reisner rcu_read_unlock(); 3390b411b363SPhilipp Reisner disconnect: 3391b792c35cSAndreas Gruenbacher crypto_free_hash(peer_integrity_tfm); 3392036b17eaSPhilipp Reisner kfree(int_dig_in); 3393036b17eaSPhilipp Reisner kfree(int_dig_vv); 3394bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 339582bc0194SAndreas Gruenbacher return -EIO; 3396b411b363SPhilipp Reisner } 3397b411b363SPhilipp Reisner 3398b411b363SPhilipp Reisner /* helper function 3399b411b363SPhilipp Reisner * input: alg name, feature name 3400b411b363SPhilipp Reisner * return: NULL (alg name was "") 3401b411b363SPhilipp Reisner * ERR_PTR(error) if something goes wrong 3402b411b363SPhilipp Reisner * or the crypto hash ptr, if it worked out ok. */ 34038ce953aaSLars Ellenberg static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, 3404b411b363SPhilipp Reisner const char *alg, const char *name) 3405b411b363SPhilipp Reisner { 3406b411b363SPhilipp Reisner struct crypto_hash *tfm; 3407b411b363SPhilipp Reisner 3408b411b363SPhilipp Reisner if (!alg[0]) 3409b411b363SPhilipp Reisner return NULL; 3410b411b363SPhilipp Reisner 3411b411b363SPhilipp Reisner tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); 3412b411b363SPhilipp Reisner if (IS_ERR(tfm)) { 3413d0180171SAndreas Gruenbacher drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", 3414b411b363SPhilipp Reisner alg, name, PTR_ERR(tfm)); 3415b411b363SPhilipp Reisner return tfm; 3416b411b363SPhilipp Reisner } 3417b411b363SPhilipp Reisner return tfm; 3418b411b363SPhilipp Reisner } 3419b411b363SPhilipp Reisner 3420bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) 3421b411b363SPhilipp Reisner { 3422bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 34234a76b161SAndreas Gruenbacher int size = pi->size; 34244a76b161SAndreas Gruenbacher 34254a76b161SAndreas Gruenbacher while (size) { 34264a76b161SAndreas Gruenbacher int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); 3427bde89a9eSAndreas Gruenbacher s = drbd_recv(connection, buffer, s); 34284a76b161SAndreas Gruenbacher if (s <= 0) { 34294a76b161SAndreas Gruenbacher if (s < 0) 34304a76b161SAndreas Gruenbacher return s; 34314a76b161SAndreas Gruenbacher break; 34324a76b161SAndreas Gruenbacher } 34334a76b161SAndreas Gruenbacher size -= s; 34344a76b161SAndreas Gruenbacher } 34354a76b161SAndreas Gruenbacher if (size) 34364a76b161SAndreas Gruenbacher return -EIO; 34374a76b161SAndreas Gruenbacher return 0; 34384a76b161SAndreas Gruenbacher } 34394a76b161SAndreas Gruenbacher 34404a76b161SAndreas Gruenbacher /* 34414a76b161SAndreas Gruenbacher * config_unknown_volume - device configuration command for unknown volume 34424a76b161SAndreas Gruenbacher * 34434a76b161SAndreas Gruenbacher * When a device is added to an existing connection, the node on which the 34444a76b161SAndreas Gruenbacher * device is added first will send configuration commands to its peer but the 34454a76b161SAndreas Gruenbacher * peer will not know about the device yet. It will warn and ignore these 34464a76b161SAndreas Gruenbacher * commands. Once the device is added on the second node, the second node will 34474a76b161SAndreas Gruenbacher * send the same device configuration commands, but in the other direction. 34484a76b161SAndreas Gruenbacher * 34494a76b161SAndreas Gruenbacher * (We can also end up here if drbd is misconfigured.) 34504a76b161SAndreas Gruenbacher */ 3451bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) 34524a76b161SAndreas Gruenbacher { 34531ec861ebSAndreas Gruenbacher drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", 34542fcb8f30SAndreas Gruenbacher cmdname(pi->cmd), pi->vnr); 3455bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 34564a76b161SAndreas Gruenbacher } 34574a76b161SAndreas Gruenbacher 3458bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) 34594a76b161SAndreas Gruenbacher { 34609f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3461b30ab791SAndreas Gruenbacher struct drbd_device *device; 3462e658983aSAndreas Gruenbacher struct p_rs_param_95 *p; 3463b411b363SPhilipp Reisner unsigned int header_size, data_size, exp_max_sz; 3464b411b363SPhilipp Reisner struct crypto_hash *verify_tfm = NULL; 3465b411b363SPhilipp Reisner struct crypto_hash *csums_tfm = NULL; 34662ec91e0eSPhilipp Reisner struct net_conf *old_net_conf, *new_net_conf = NULL; 3467813472ceSPhilipp Reisner struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 3468bde89a9eSAndreas Gruenbacher const int apv = connection->agreed_pro_version; 3469813472ceSPhilipp Reisner struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 3470778f271dSPhilipp Reisner int fifo_size = 0; 347182bc0194SAndreas Gruenbacher int err; 3472b411b363SPhilipp Reisner 34739f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 34749f4fe9adSAndreas Gruenbacher if (!peer_device) 3475bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 34769f4fe9adSAndreas Gruenbacher device = peer_device->device; 3477b411b363SPhilipp Reisner 3478b411b363SPhilipp Reisner exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 3479b411b363SPhilipp Reisner : apv == 88 ? sizeof(struct p_rs_param) 3480b411b363SPhilipp Reisner + SHARED_SECRET_MAX 34818e26f9ccSPhilipp Reisner : apv <= 94 ? sizeof(struct p_rs_param_89) 34828e26f9ccSPhilipp Reisner : /* apv >= 95 */ sizeof(struct p_rs_param_95); 3483b411b363SPhilipp Reisner 3484e2857216SAndreas Gruenbacher if (pi->size > exp_max_sz) { 3485d0180171SAndreas Gruenbacher drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", 3486e2857216SAndreas Gruenbacher pi->size, exp_max_sz); 348782bc0194SAndreas Gruenbacher return -EIO; 3488b411b363SPhilipp Reisner } 3489b411b363SPhilipp Reisner 3490b411b363SPhilipp Reisner if (apv <= 88) { 3491e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param); 3492e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 34938e26f9ccSPhilipp Reisner } else if (apv <= 94) { 3494e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_89); 3495e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 34960b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 34978e26f9ccSPhilipp Reisner } else { 3498e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_95); 3499e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 35000b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 3501b411b363SPhilipp Reisner } 3502b411b363SPhilipp Reisner 3503b411b363SPhilipp Reisner /* initialize verify_alg and csums_alg */ 3504e658983aSAndreas Gruenbacher p = pi->data; 3505b411b363SPhilipp Reisner memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 3506b411b363SPhilipp Reisner 35079f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, header_size); 350882bc0194SAndreas Gruenbacher if (err) 350982bc0194SAndreas Gruenbacher return err; 3510b411b363SPhilipp Reisner 35110500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 35129f4fe9adSAndreas Gruenbacher old_net_conf = peer_device->connection->net_conf; 3513b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3514daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 3515daeda1ccSPhilipp Reisner if (!new_disk_conf) { 3516b30ab791SAndreas Gruenbacher put_ldev(device); 35170500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3518d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 3519daeda1ccSPhilipp Reisner return -ENOMEM; 3520f399002eSLars Ellenberg } 3521b411b363SPhilipp Reisner 3522b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 3523daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 3524daeda1ccSPhilipp Reisner 35256394b935SAndreas Gruenbacher new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); 3526813472ceSPhilipp Reisner } 3527b411b363SPhilipp Reisner 3528b411b363SPhilipp Reisner if (apv >= 88) { 3529b411b363SPhilipp Reisner if (apv == 88) { 35305de73827SPhilipp Reisner if (data_size > SHARED_SECRET_MAX || data_size == 0) { 3531d0180171SAndreas Gruenbacher drbd_err(device, "verify-alg of wrong size, " 35325de73827SPhilipp Reisner "peer wants %u, accepting only up to %u byte\n", 3533b411b363SPhilipp Reisner data_size, SHARED_SECRET_MAX); 3534813472ceSPhilipp Reisner err = -EIO; 3535813472ceSPhilipp Reisner goto reconnect; 3536b411b363SPhilipp Reisner } 3537b411b363SPhilipp Reisner 35389f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); 3539813472ceSPhilipp Reisner if (err) 3540813472ceSPhilipp Reisner goto reconnect; 3541b411b363SPhilipp Reisner /* we expect NUL terminated string */ 3542b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 35430b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[data_size-1] == 0); 3544b411b363SPhilipp Reisner p->verify_alg[data_size-1] = 0; 3545b411b363SPhilipp Reisner 3546b411b363SPhilipp Reisner } else /* apv >= 89 */ { 3547b411b363SPhilipp Reisner /* we still expect NUL terminated strings */ 3548b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 35490b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); 35500b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); 3551b411b363SPhilipp Reisner p->verify_alg[SHARED_SECRET_MAX-1] = 0; 3552b411b363SPhilipp Reisner p->csums_alg[SHARED_SECRET_MAX-1] = 0; 3553b411b363SPhilipp Reisner } 3554b411b363SPhilipp Reisner 35552ec91e0eSPhilipp Reisner if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { 3556b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3557d0180171SAndreas Gruenbacher drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 35582ec91e0eSPhilipp Reisner old_net_conf->verify_alg, p->verify_alg); 3559b411b363SPhilipp Reisner goto disconnect; 3560b411b363SPhilipp Reisner } 3561b30ab791SAndreas Gruenbacher verify_tfm = drbd_crypto_alloc_digest_safe(device, 3562b411b363SPhilipp Reisner p->verify_alg, "verify-alg"); 3563b411b363SPhilipp Reisner if (IS_ERR(verify_tfm)) { 3564b411b363SPhilipp Reisner verify_tfm = NULL; 3565b411b363SPhilipp Reisner goto disconnect; 3566b411b363SPhilipp Reisner } 3567b411b363SPhilipp Reisner } 3568b411b363SPhilipp Reisner 35692ec91e0eSPhilipp Reisner if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { 3570b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3571d0180171SAndreas Gruenbacher drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 35722ec91e0eSPhilipp Reisner old_net_conf->csums_alg, p->csums_alg); 3573b411b363SPhilipp Reisner goto disconnect; 3574b411b363SPhilipp Reisner } 3575b30ab791SAndreas Gruenbacher csums_tfm = drbd_crypto_alloc_digest_safe(device, 3576b411b363SPhilipp Reisner p->csums_alg, "csums-alg"); 3577b411b363SPhilipp Reisner if (IS_ERR(csums_tfm)) { 3578b411b363SPhilipp Reisner csums_tfm = NULL; 3579b411b363SPhilipp Reisner goto disconnect; 3580b411b363SPhilipp Reisner } 3581b411b363SPhilipp Reisner } 3582b411b363SPhilipp Reisner 3583813472ceSPhilipp Reisner if (apv > 94 && new_disk_conf) { 3584daeda1ccSPhilipp Reisner new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 3585daeda1ccSPhilipp Reisner new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); 3586daeda1ccSPhilipp Reisner new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); 3587daeda1ccSPhilipp Reisner new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); 3588778f271dSPhilipp Reisner 3589daeda1ccSPhilipp Reisner fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 3590b30ab791SAndreas Gruenbacher if (fifo_size != device->rs_plan_s->size) { 3591813472ceSPhilipp Reisner new_plan = fifo_alloc(fifo_size); 3592813472ceSPhilipp Reisner if (!new_plan) { 3593d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of fifo_buffer failed"); 3594b30ab791SAndreas Gruenbacher put_ldev(device); 3595778f271dSPhilipp Reisner goto disconnect; 3596778f271dSPhilipp Reisner } 3597778f271dSPhilipp Reisner } 35988e26f9ccSPhilipp Reisner } 3599b411b363SPhilipp Reisner 360091fd4dadSPhilipp Reisner if (verify_tfm || csums_tfm) { 36012ec91e0eSPhilipp Reisner new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 36022ec91e0eSPhilipp Reisner if (!new_net_conf) { 3603d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new net_conf failed\n"); 360491fd4dadSPhilipp Reisner goto disconnect; 360591fd4dadSPhilipp Reisner } 360691fd4dadSPhilipp Reisner 36072ec91e0eSPhilipp Reisner *new_net_conf = *old_net_conf; 360891fd4dadSPhilipp Reisner 3609b411b363SPhilipp Reisner if (verify_tfm) { 36102ec91e0eSPhilipp Reisner strcpy(new_net_conf->verify_alg, p->verify_alg); 36112ec91e0eSPhilipp Reisner new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; 36129f4fe9adSAndreas Gruenbacher crypto_free_hash(peer_device->connection->verify_tfm); 36139f4fe9adSAndreas Gruenbacher peer_device->connection->verify_tfm = verify_tfm; 3614d0180171SAndreas Gruenbacher drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); 3615b411b363SPhilipp Reisner } 3616b411b363SPhilipp Reisner if (csums_tfm) { 36172ec91e0eSPhilipp Reisner strcpy(new_net_conf->csums_alg, p->csums_alg); 36182ec91e0eSPhilipp Reisner new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; 36199f4fe9adSAndreas Gruenbacher crypto_free_hash(peer_device->connection->csums_tfm); 36209f4fe9adSAndreas Gruenbacher peer_device->connection->csums_tfm = csums_tfm; 3621d0180171SAndreas Gruenbacher drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); 3622b411b363SPhilipp Reisner } 3623bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 3624778f271dSPhilipp Reisner } 3625b411b363SPhilipp Reisner } 3626b411b363SPhilipp Reisner 3627813472ceSPhilipp Reisner if (new_disk_conf) { 3628b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 3629b30ab791SAndreas Gruenbacher put_ldev(device); 3630b411b363SPhilipp Reisner } 3631813472ceSPhilipp Reisner 3632813472ceSPhilipp Reisner if (new_plan) { 3633b30ab791SAndreas Gruenbacher old_plan = device->rs_plan_s; 3634b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->rs_plan_s, new_plan); 3635813472ceSPhilipp Reisner } 3636daeda1ccSPhilipp Reisner 36370500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3638daeda1ccSPhilipp Reisner synchronize_rcu(); 3639daeda1ccSPhilipp Reisner if (new_net_conf) 3640daeda1ccSPhilipp Reisner kfree(old_net_conf); 3641daeda1ccSPhilipp Reisner kfree(old_disk_conf); 3642813472ceSPhilipp Reisner kfree(old_plan); 3643daeda1ccSPhilipp Reisner 364482bc0194SAndreas Gruenbacher return 0; 3645b411b363SPhilipp Reisner 3646813472ceSPhilipp Reisner reconnect: 3647813472ceSPhilipp Reisner if (new_disk_conf) { 3648b30ab791SAndreas Gruenbacher put_ldev(device); 3649813472ceSPhilipp Reisner kfree(new_disk_conf); 3650813472ceSPhilipp Reisner } 36510500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3652813472ceSPhilipp Reisner return -EIO; 3653813472ceSPhilipp Reisner 3654b411b363SPhilipp Reisner disconnect: 3655813472ceSPhilipp Reisner kfree(new_plan); 3656813472ceSPhilipp Reisner if (new_disk_conf) { 3657b30ab791SAndreas Gruenbacher put_ldev(device); 3658813472ceSPhilipp Reisner kfree(new_disk_conf); 3659813472ceSPhilipp Reisner } 36600500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3661b411b363SPhilipp Reisner /* just for completeness: actually not needed, 3662b411b363SPhilipp Reisner * as this is not reached if csums_tfm was ok. */ 3663b411b363SPhilipp Reisner crypto_free_hash(csums_tfm); 3664b411b363SPhilipp Reisner /* but free the verify_tfm again, if csums_tfm did not work out */ 3665b411b363SPhilipp Reisner crypto_free_hash(verify_tfm); 36669f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 366782bc0194SAndreas Gruenbacher return -EIO; 3668b411b363SPhilipp Reisner } 3669b411b363SPhilipp Reisner 3670b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */ 3671b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device, 3672b411b363SPhilipp Reisner const char *s, sector_t a, sector_t b) 3673b411b363SPhilipp Reisner { 3674b411b363SPhilipp Reisner sector_t d; 3675b411b363SPhilipp Reisner if (a == 0 || b == 0) 3676b411b363SPhilipp Reisner return; 3677b411b363SPhilipp Reisner d = (a > b) ? (a - b) : (b - a); 3678b411b363SPhilipp Reisner if (d > (a>>3) || d > (b>>3)) 3679d0180171SAndreas Gruenbacher drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, 3680b411b363SPhilipp Reisner (unsigned long long)a, (unsigned long long)b); 3681b411b363SPhilipp Reisner } 3682b411b363SPhilipp Reisner 3683bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) 3684b411b363SPhilipp Reisner { 36859f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3686b30ab791SAndreas Gruenbacher struct drbd_device *device; 3687e658983aSAndreas Gruenbacher struct p_sizes *p = pi->data; 3688e96c9633SPhilipp Reisner enum determine_dev_size dd = DS_UNCHANGED; 36896a8d68b1SLars Ellenberg sector_t p_size, p_usize, p_csize, my_usize; 3690b411b363SPhilipp Reisner int ldsc = 0; /* local disk size changed */ 3691e89b591cSPhilipp Reisner enum dds_flags ddsf; 3692b411b363SPhilipp Reisner 36939f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 36949f4fe9adSAndreas Gruenbacher if (!peer_device) 3695bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 36969f4fe9adSAndreas Gruenbacher device = peer_device->device; 36974a76b161SAndreas Gruenbacher 3698b411b363SPhilipp Reisner p_size = be64_to_cpu(p->d_size); 3699b411b363SPhilipp Reisner p_usize = be64_to_cpu(p->u_size); 37006a8d68b1SLars Ellenberg p_csize = be64_to_cpu(p->c_size); 3701b411b363SPhilipp Reisner 3702b411b363SPhilipp Reisner /* just store the peer's disk size for now. 3703b411b363SPhilipp Reisner * we still need to figure out whether we accept that. */ 3704b30ab791SAndreas Gruenbacher device->p_size = p_size; 3705b411b363SPhilipp Reisner 3706b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3707daeda1ccSPhilipp Reisner rcu_read_lock(); 3708b30ab791SAndreas Gruenbacher my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 3709daeda1ccSPhilipp Reisner rcu_read_unlock(); 3710daeda1ccSPhilipp Reisner 3711b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "lower level device sizes", 3712b30ab791SAndreas Gruenbacher p_size, drbd_get_max_capacity(device->ldev)); 3713b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "user requested size", 3714daeda1ccSPhilipp Reisner p_usize, my_usize); 3715b411b363SPhilipp Reisner 3716b411b363SPhilipp Reisner /* if this is the first connect, or an otherwise expected 3717b411b363SPhilipp Reisner * param exchange, choose the minimum */ 3718b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) 3719daeda1ccSPhilipp Reisner p_usize = min_not_zero(my_usize, p_usize); 3720b411b363SPhilipp Reisner 3721b411b363SPhilipp Reisner /* Never shrink a device with usable data during connect. 3722b411b363SPhilipp Reisner But allow online shrinking if we are connected. */ 3723b30ab791SAndreas Gruenbacher if (drbd_new_dev_size(device, device->ldev, p_usize, 0) < 3724b30ab791SAndreas Gruenbacher drbd_get_capacity(device->this_bdev) && 3725b30ab791SAndreas Gruenbacher device->state.disk >= D_OUTDATED && 3726b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED) { 3727d0180171SAndreas Gruenbacher drbd_err(device, "The peer's disk size is too small!\n"); 37289f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 3729b30ab791SAndreas Gruenbacher put_ldev(device); 373082bc0194SAndreas Gruenbacher return -EIO; 3731b411b363SPhilipp Reisner } 3732daeda1ccSPhilipp Reisner 3733daeda1ccSPhilipp Reisner if (my_usize != p_usize) { 3734daeda1ccSPhilipp Reisner struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 3735daeda1ccSPhilipp Reisner 3736daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 3737daeda1ccSPhilipp Reisner if (!new_disk_conf) { 3738d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 3739b30ab791SAndreas Gruenbacher put_ldev(device); 3740daeda1ccSPhilipp Reisner return -ENOMEM; 3741daeda1ccSPhilipp Reisner } 3742daeda1ccSPhilipp Reisner 37430500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 3744b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 3745daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 3746daeda1ccSPhilipp Reisner new_disk_conf->disk_size = p_usize; 3747daeda1ccSPhilipp Reisner 3748b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 37490500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3750daeda1ccSPhilipp Reisner synchronize_rcu(); 3751daeda1ccSPhilipp Reisner kfree(old_disk_conf); 3752daeda1ccSPhilipp Reisner 3753d0180171SAndreas Gruenbacher drbd_info(device, "Peer sets u_size to %lu sectors\n", 3754daeda1ccSPhilipp Reisner (unsigned long)my_usize); 3755daeda1ccSPhilipp Reisner } 3756daeda1ccSPhilipp Reisner 3757b30ab791SAndreas Gruenbacher put_ldev(device); 3758b411b363SPhilipp Reisner } 3759b411b363SPhilipp Reisner 376020c68fdeSLars Ellenberg device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 376120c68fdeSLars Ellenberg /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). 376220c68fdeSLars Ellenberg In case we cleared the QUEUE_FLAG_DISCARD from our queue in 376320c68fdeSLars Ellenberg drbd_reconsider_max_bio_size(), we can be sure that after 376420c68fdeSLars Ellenberg drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ 376520c68fdeSLars Ellenberg 3766e89b591cSPhilipp Reisner ddsf = be16_to_cpu(p->dds_flags); 3767b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 37688fe39aacSPhilipp Reisner drbd_reconsider_max_bio_size(device, device->ldev); 3769b30ab791SAndreas Gruenbacher dd = drbd_determine_dev_size(device, ddsf, NULL); 3770b30ab791SAndreas Gruenbacher put_ldev(device); 3771e96c9633SPhilipp Reisner if (dd == DS_ERROR) 377282bc0194SAndreas Gruenbacher return -EIO; 3773b30ab791SAndreas Gruenbacher drbd_md_sync(device); 3774b411b363SPhilipp Reisner } else { 37756a8d68b1SLars Ellenberg /* 37766a8d68b1SLars Ellenberg * I am diskless, need to accept the peer's *current* size. 37776a8d68b1SLars Ellenberg * I must NOT accept the peers backing disk size, 37786a8d68b1SLars Ellenberg * it may have been larger than mine all along... 37796a8d68b1SLars Ellenberg * 37806a8d68b1SLars Ellenberg * At this point, the peer knows more about my disk, or at 37816a8d68b1SLars Ellenberg * least about what we last agreed upon, than myself. 37826a8d68b1SLars Ellenberg * So if his c_size is less than his d_size, the most likely 37836a8d68b1SLars Ellenberg * reason is that *my* d_size was smaller last time we checked. 37846a8d68b1SLars Ellenberg * 37856a8d68b1SLars Ellenberg * However, if he sends a zero current size, 37866a8d68b1SLars Ellenberg * take his (user-capped or) backing disk size anyways. 37876a8d68b1SLars Ellenberg */ 37888fe39aacSPhilipp Reisner drbd_reconsider_max_bio_size(device, NULL); 37896a8d68b1SLars Ellenberg drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); 3790b411b363SPhilipp Reisner } 3791b411b363SPhilipp Reisner 3792b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3793b30ab791SAndreas Gruenbacher if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { 3794b30ab791SAndreas Gruenbacher device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 3795b411b363SPhilipp Reisner ldsc = 1; 3796b411b363SPhilipp Reisner } 3797b411b363SPhilipp Reisner 3798b30ab791SAndreas Gruenbacher put_ldev(device); 3799b411b363SPhilipp Reisner } 3800b411b363SPhilipp Reisner 3801b30ab791SAndreas Gruenbacher if (device->state.conn > C_WF_REPORT_PARAMS) { 3802b411b363SPhilipp Reisner if (be64_to_cpu(p->c_size) != 3803b30ab791SAndreas Gruenbacher drbd_get_capacity(device->this_bdev) || ldsc) { 3804b411b363SPhilipp Reisner /* we have different sizes, probably peer 3805b411b363SPhilipp Reisner * needs to know my new size... */ 380669a22773SAndreas Gruenbacher drbd_send_sizes(peer_device, 0, ddsf); 3807b411b363SPhilipp Reisner } 3808b30ab791SAndreas Gruenbacher if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || 3809b30ab791SAndreas Gruenbacher (dd == DS_GREW && device->state.conn == C_CONNECTED)) { 3810b30ab791SAndreas Gruenbacher if (device->state.pdsk >= D_INCONSISTENT && 3811b30ab791SAndreas Gruenbacher device->state.disk >= D_INCONSISTENT) { 3812e89b591cSPhilipp Reisner if (ddsf & DDSF_NO_RESYNC) 3813d0180171SAndreas Gruenbacher drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); 3814b411b363SPhilipp Reisner else 3815b30ab791SAndreas Gruenbacher resync_after_online_grow(device); 3816e89b591cSPhilipp Reisner } else 3817b30ab791SAndreas Gruenbacher set_bit(RESYNC_AFTER_NEG, &device->flags); 3818b411b363SPhilipp Reisner } 3819b411b363SPhilipp Reisner } 3820b411b363SPhilipp Reisner 382182bc0194SAndreas Gruenbacher return 0; 3822b411b363SPhilipp Reisner } 3823b411b363SPhilipp Reisner 3824bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) 3825b411b363SPhilipp Reisner { 38269f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3827b30ab791SAndreas Gruenbacher struct drbd_device *device; 3828e658983aSAndreas Gruenbacher struct p_uuids *p = pi->data; 3829b411b363SPhilipp Reisner u64 *p_uuid; 383062b0da3aSLars Ellenberg int i, updated_uuids = 0; 3831b411b363SPhilipp Reisner 38329f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 38339f4fe9adSAndreas Gruenbacher if (!peer_device) 3834bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 38359f4fe9adSAndreas Gruenbacher device = peer_device->device; 38364a76b161SAndreas Gruenbacher 3837b411b363SPhilipp Reisner p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3838063eacf8SJing Wang if (!p_uuid) { 3839d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of p_uuid failed\n"); 3840063eacf8SJing Wang return false; 3841063eacf8SJing Wang } 3842b411b363SPhilipp Reisner 3843b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 3844b411b363SPhilipp Reisner p_uuid[i] = be64_to_cpu(p->uuid[i]); 3845b411b363SPhilipp Reisner 3846b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 3847b30ab791SAndreas Gruenbacher device->p_uuid = p_uuid; 3848b411b363SPhilipp Reisner 3849b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED && 3850b30ab791SAndreas Gruenbacher device->state.disk < D_INCONSISTENT && 3851b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && 3852b30ab791SAndreas Gruenbacher (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 3853d0180171SAndreas Gruenbacher drbd_err(device, "Can only connect to data with current UUID=%016llX\n", 3854b30ab791SAndreas Gruenbacher (unsigned long long)device->ed_uuid); 38559f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 385682bc0194SAndreas Gruenbacher return -EIO; 3857b411b363SPhilipp Reisner } 3858b411b363SPhilipp Reisner 3859b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3860b411b363SPhilipp Reisner int skip_initial_sync = 3861b30ab791SAndreas Gruenbacher device->state.conn == C_CONNECTED && 38629f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90 && 3863b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 3864b411b363SPhilipp Reisner (p_uuid[UI_FLAGS] & 8); 3865b411b363SPhilipp Reisner if (skip_initial_sync) { 3866d0180171SAndreas Gruenbacher drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); 3867b30ab791SAndreas Gruenbacher drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 386820ceb2b2SLars Ellenberg "clear_n_write from receive_uuids", 386920ceb2b2SLars Ellenberg BM_LOCKED_TEST_ALLOWED); 3870b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); 3871b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0); 3872b30ab791SAndreas Gruenbacher _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3873b411b363SPhilipp Reisner CS_VERBOSE, NULL); 3874b30ab791SAndreas Gruenbacher drbd_md_sync(device); 387562b0da3aSLars Ellenberg updated_uuids = 1; 3876b411b363SPhilipp Reisner } 3877b30ab791SAndreas Gruenbacher put_ldev(device); 3878b30ab791SAndreas Gruenbacher } else if (device->state.disk < D_INCONSISTENT && 3879b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY) { 388018a50fa2SPhilipp Reisner /* I am a diskless primary, the peer just created a new current UUID 388118a50fa2SPhilipp Reisner for me. */ 3882b30ab791SAndreas Gruenbacher updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 3883b411b363SPhilipp Reisner } 3884b411b363SPhilipp Reisner 3885b411b363SPhilipp Reisner /* Before we test for the disk state, we should wait until an eventually 3886b411b363SPhilipp Reisner ongoing cluster wide state change is finished. That is important if 3887b411b363SPhilipp Reisner we are primary and are detaching from our disk. We need to see the 3888b411b363SPhilipp Reisner new disk state... */ 3889b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 3890b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 3891b30ab791SAndreas Gruenbacher if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) 3892b30ab791SAndreas Gruenbacher updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 389362b0da3aSLars Ellenberg 389462b0da3aSLars Ellenberg if (updated_uuids) 3895b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "receiver updated UUIDs to"); 3896b411b363SPhilipp Reisner 389782bc0194SAndreas Gruenbacher return 0; 3898b411b363SPhilipp Reisner } 3899b411b363SPhilipp Reisner 3900b411b363SPhilipp Reisner /** 3901b411b363SPhilipp Reisner * convert_state() - Converts the peer's view of the cluster state to our point of view 3902b411b363SPhilipp Reisner * @ps: The state as seen by the peer. 3903b411b363SPhilipp Reisner */ 3904b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps) 3905b411b363SPhilipp Reisner { 3906b411b363SPhilipp Reisner union drbd_state ms; 3907b411b363SPhilipp Reisner 3908b411b363SPhilipp Reisner static enum drbd_conns c_tab[] = { 3909369bea63SPhilipp Reisner [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, 3910b411b363SPhilipp Reisner [C_CONNECTED] = C_CONNECTED, 3911b411b363SPhilipp Reisner 3912b411b363SPhilipp Reisner [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 3913b411b363SPhilipp Reisner [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, 3914b411b363SPhilipp Reisner [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ 3915b411b363SPhilipp Reisner [C_VERIFY_S] = C_VERIFY_T, 3916b411b363SPhilipp Reisner [C_MASK] = C_MASK, 3917b411b363SPhilipp Reisner }; 3918b411b363SPhilipp Reisner 3919b411b363SPhilipp Reisner ms.i = ps.i; 3920b411b363SPhilipp Reisner 3921b411b363SPhilipp Reisner ms.conn = c_tab[ps.conn]; 3922b411b363SPhilipp Reisner ms.peer = ps.role; 3923b411b363SPhilipp Reisner ms.role = ps.peer; 3924b411b363SPhilipp Reisner ms.pdsk = ps.disk; 3925b411b363SPhilipp Reisner ms.disk = ps.pdsk; 3926b411b363SPhilipp Reisner ms.peer_isp = (ps.aftr_isp | ps.user_isp); 3927b411b363SPhilipp Reisner 3928b411b363SPhilipp Reisner return ms; 3929b411b363SPhilipp Reisner } 3930b411b363SPhilipp Reisner 3931bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) 3932b411b363SPhilipp Reisner { 39339f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3934b30ab791SAndreas Gruenbacher struct drbd_device *device; 3935e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 3936b411b363SPhilipp Reisner union drbd_state mask, val; 3937bf885f8aSAndreas Gruenbacher enum drbd_state_rv rv; 3938b411b363SPhilipp Reisner 39399f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 39409f4fe9adSAndreas Gruenbacher if (!peer_device) 39414a76b161SAndreas Gruenbacher return -EIO; 39429f4fe9adSAndreas Gruenbacher device = peer_device->device; 39434a76b161SAndreas Gruenbacher 3944b411b363SPhilipp Reisner mask.i = be32_to_cpu(p->mask); 3945b411b363SPhilipp Reisner val.i = be32_to_cpu(p->val); 3946b411b363SPhilipp Reisner 39479f4fe9adSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && 3948b30ab791SAndreas Gruenbacher mutex_is_locked(device->state_mutex)) { 394969a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); 395082bc0194SAndreas Gruenbacher return 0; 3951b411b363SPhilipp Reisner } 3952b411b363SPhilipp Reisner 3953b411b363SPhilipp Reisner mask = convert_state(mask); 3954b411b363SPhilipp Reisner val = convert_state(val); 3955b411b363SPhilipp Reisner 3956b30ab791SAndreas Gruenbacher rv = drbd_change_state(device, CS_VERBOSE, mask, val); 395769a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, rv); 3958047cd4a6SPhilipp Reisner 3959b30ab791SAndreas Gruenbacher drbd_md_sync(device); 3960b411b363SPhilipp Reisner 396182bc0194SAndreas Gruenbacher return 0; 3962b411b363SPhilipp Reisner } 3963b411b363SPhilipp Reisner 3964bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) 3965b411b363SPhilipp Reisner { 3966e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 3967dfafcc8aSPhilipp Reisner union drbd_state mask, val; 3968dfafcc8aSPhilipp Reisner enum drbd_state_rv rv; 3969dfafcc8aSPhilipp Reisner 3970dfafcc8aSPhilipp Reisner mask.i = be32_to_cpu(p->mask); 3971dfafcc8aSPhilipp Reisner val.i = be32_to_cpu(p->val); 3972dfafcc8aSPhilipp Reisner 3973bde89a9eSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && 3974bde89a9eSAndreas Gruenbacher mutex_is_locked(&connection->cstate_mutex)) { 3975bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); 397682bc0194SAndreas Gruenbacher return 0; 3977dfafcc8aSPhilipp Reisner } 3978dfafcc8aSPhilipp Reisner 3979dfafcc8aSPhilipp Reisner mask = convert_state(mask); 3980dfafcc8aSPhilipp Reisner val = convert_state(val); 3981dfafcc8aSPhilipp Reisner 3982bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); 3983bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, rv); 3984dfafcc8aSPhilipp Reisner 398582bc0194SAndreas Gruenbacher return 0; 3986dfafcc8aSPhilipp Reisner } 3987dfafcc8aSPhilipp Reisner 3988bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi) 3989b411b363SPhilipp Reisner { 39909f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3991b30ab791SAndreas Gruenbacher struct drbd_device *device; 3992e658983aSAndreas Gruenbacher struct p_state *p = pi->data; 39934ac4aadaSLars Ellenberg union drbd_state os, ns, peer_state; 3994b411b363SPhilipp Reisner enum drbd_disk_state real_peer_disk; 399565d922c3SPhilipp Reisner enum chg_state_flags cs_flags; 3996b411b363SPhilipp Reisner int rv; 3997b411b363SPhilipp Reisner 39989f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 39999f4fe9adSAndreas Gruenbacher if (!peer_device) 4000bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 40019f4fe9adSAndreas Gruenbacher device = peer_device->device; 40024a76b161SAndreas Gruenbacher 4003b411b363SPhilipp Reisner peer_state.i = be32_to_cpu(p->state); 4004b411b363SPhilipp Reisner 4005b411b363SPhilipp Reisner real_peer_disk = peer_state.disk; 4006b411b363SPhilipp Reisner if (peer_state.disk == D_NEGOTIATING) { 4007b30ab791SAndreas Gruenbacher real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; 4008d0180171SAndreas Gruenbacher drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 4009b411b363SPhilipp Reisner } 4010b411b363SPhilipp Reisner 40110500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4012b411b363SPhilipp Reisner retry: 4013b30ab791SAndreas Gruenbacher os = ns = drbd_read_state(device); 40140500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4015b411b363SPhilipp Reisner 4016545752d5SLars Ellenberg /* If some other part of the code (asender thread, timeout) 4017545752d5SLars Ellenberg * already decided to close the connection again, 4018545752d5SLars Ellenberg * we must not "re-establish" it here. */ 4019545752d5SLars Ellenberg if (os.conn <= C_TEAR_DOWN) 402058ffa580SLars Ellenberg return -ECONNRESET; 4021545752d5SLars Ellenberg 402240424e4aSLars Ellenberg /* If this is the "end of sync" confirmation, usually the peer disk 402340424e4aSLars Ellenberg * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 402440424e4aSLars Ellenberg * set) resync started in PausedSyncT, or if the timing of pause-/ 402540424e4aSLars Ellenberg * unpause-sync events has been "just right", the peer disk may 402640424e4aSLars Ellenberg * transition from D_CONSISTENT to D_UP_TO_DATE as well. 402740424e4aSLars Ellenberg */ 402840424e4aSLars Ellenberg if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && 402940424e4aSLars Ellenberg real_peer_disk == D_UP_TO_DATE && 4030e9ef7bb6SLars Ellenberg os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 4031e9ef7bb6SLars Ellenberg /* If we are (becoming) SyncSource, but peer is still in sync 4032e9ef7bb6SLars Ellenberg * preparation, ignore its uptodate-ness to avoid flapping, it 4033e9ef7bb6SLars Ellenberg * will change to inconsistent once the peer reaches active 4034e9ef7bb6SLars Ellenberg * syncing states. 4035e9ef7bb6SLars Ellenberg * It may have changed syncer-paused flags, however, so we 4036e9ef7bb6SLars Ellenberg * cannot ignore this completely. */ 4037e9ef7bb6SLars Ellenberg if (peer_state.conn > C_CONNECTED && 4038e9ef7bb6SLars Ellenberg peer_state.conn < C_SYNC_SOURCE) 4039e9ef7bb6SLars Ellenberg real_peer_disk = D_INCONSISTENT; 4040e9ef7bb6SLars Ellenberg 4041e9ef7bb6SLars Ellenberg /* if peer_state changes to connected at the same time, 4042e9ef7bb6SLars Ellenberg * it explicitly notifies us that it finished resync. 4043e9ef7bb6SLars Ellenberg * Maybe we should finish it up, too? */ 4044e9ef7bb6SLars Ellenberg else if (os.conn >= C_SYNC_SOURCE && 4045e9ef7bb6SLars Ellenberg peer_state.conn == C_CONNECTED) { 4046b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device) <= device->rs_failed) 4047b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 404882bc0194SAndreas Gruenbacher return 0; 4049e9ef7bb6SLars Ellenberg } 4050e9ef7bb6SLars Ellenberg } 4051e9ef7bb6SLars Ellenberg 405202b91b55SLars Ellenberg /* explicit verify finished notification, stop sector reached. */ 405302b91b55SLars Ellenberg if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && 405402b91b55SLars Ellenberg peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { 4055b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 4056b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 405758ffa580SLars Ellenberg return 0; 405802b91b55SLars Ellenberg } 405902b91b55SLars Ellenberg 4060e9ef7bb6SLars Ellenberg /* peer says his disk is inconsistent, while we think it is uptodate, 4061e9ef7bb6SLars Ellenberg * and this happens while the peer still thinks we have a sync going on, 4062e9ef7bb6SLars Ellenberg * but we think we are already done with the sync. 4063e9ef7bb6SLars Ellenberg * We ignore this to avoid flapping pdsk. 4064e9ef7bb6SLars Ellenberg * This should not happen, if the peer is a recent version of drbd. */ 4065e9ef7bb6SLars Ellenberg if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && 4066e9ef7bb6SLars Ellenberg os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) 4067e9ef7bb6SLars Ellenberg real_peer_disk = D_UP_TO_DATE; 4068e9ef7bb6SLars Ellenberg 40694ac4aadaSLars Ellenberg if (ns.conn == C_WF_REPORT_PARAMS) 40704ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4071b411b363SPhilipp Reisner 407267531718SPhilipp Reisner if (peer_state.conn == C_AHEAD) 407367531718SPhilipp Reisner ns.conn = C_BEHIND; 407467531718SPhilipp Reisner 4075b30ab791SAndreas Gruenbacher if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 4076b30ab791SAndreas Gruenbacher get_ldev_if_state(device, D_NEGOTIATING)) { 4077b411b363SPhilipp Reisner int cr; /* consider resync */ 4078b411b363SPhilipp Reisner 4079b411b363SPhilipp Reisner /* if we established a new connection */ 40804ac4aadaSLars Ellenberg cr = (os.conn < C_CONNECTED); 4081b411b363SPhilipp Reisner /* if we had an established connection 4082b411b363SPhilipp Reisner * and one of the nodes newly attaches a disk */ 40834ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4084b411b363SPhilipp Reisner (peer_state.disk == D_NEGOTIATING || 40854ac4aadaSLars Ellenberg os.disk == D_NEGOTIATING)); 4086b411b363SPhilipp Reisner /* if we have both been inconsistent, and the peer has been 4087b411b363SPhilipp Reisner * forced to be UpToDate with --overwrite-data */ 4088b30ab791SAndreas Gruenbacher cr |= test_bit(CONSIDER_RESYNC, &device->flags); 4089b411b363SPhilipp Reisner /* if we had been plain connected, and the admin requested to 4090b411b363SPhilipp Reisner * start a sync by "invalidate" or "invalidate-remote" */ 40914ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4092b411b363SPhilipp Reisner (peer_state.conn >= C_STARTING_SYNC_S && 4093b411b363SPhilipp Reisner peer_state.conn <= C_WF_BITMAP_T)); 4094b411b363SPhilipp Reisner 4095b411b363SPhilipp Reisner if (cr) 409669a22773SAndreas Gruenbacher ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); 4097b411b363SPhilipp Reisner 4098b30ab791SAndreas Gruenbacher put_ldev(device); 40994ac4aadaSLars Ellenberg if (ns.conn == C_MASK) { 41004ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4101b30ab791SAndreas Gruenbacher if (device->state.disk == D_NEGOTIATING) { 4102b30ab791SAndreas Gruenbacher drbd_force_state(device, NS(disk, D_FAILED)); 4103b411b363SPhilipp Reisner } else if (peer_state.disk == D_NEGOTIATING) { 4104d0180171SAndreas Gruenbacher drbd_err(device, "Disk attach process on the peer node was aborted.\n"); 4105b411b363SPhilipp Reisner peer_state.disk = D_DISKLESS; 4106580b9767SLars Ellenberg real_peer_disk = D_DISKLESS; 4107b411b363SPhilipp Reisner } else { 41089f4fe9adSAndreas Gruenbacher if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) 410982bc0194SAndreas Gruenbacher return -EIO; 41100b0ba1efSAndreas Gruenbacher D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); 41119f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 411282bc0194SAndreas Gruenbacher return -EIO; 4113b411b363SPhilipp Reisner } 4114b411b363SPhilipp Reisner } 4115b411b363SPhilipp Reisner } 4116b411b363SPhilipp Reisner 41170500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4118b30ab791SAndreas Gruenbacher if (os.i != drbd_read_state(device).i) 4119b411b363SPhilipp Reisner goto retry; 4120b30ab791SAndreas Gruenbacher clear_bit(CONSIDER_RESYNC, &device->flags); 4121b411b363SPhilipp Reisner ns.peer = peer_state.role; 4122b411b363SPhilipp Reisner ns.pdsk = real_peer_disk; 4123b411b363SPhilipp Reisner ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 41244ac4aadaSLars Ellenberg if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 4125b30ab791SAndreas Gruenbacher ns.disk = device->new_state_tmp.disk; 41264ac4aadaSLars Ellenberg cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 4127b30ab791SAndreas Gruenbacher if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 4128b30ab791SAndreas Gruenbacher test_bit(NEW_CUR_UUID, &device->flags)) { 41298554df1cSAndreas Gruenbacher /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this 4130481c6f50SPhilipp Reisner for temporal network outages! */ 41310500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4132d0180171SAndreas Gruenbacher drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 41339f4fe9adSAndreas Gruenbacher tl_clear(peer_device->connection); 4134b30ab791SAndreas Gruenbacher drbd_uuid_new_current(device); 4135b30ab791SAndreas Gruenbacher clear_bit(NEW_CUR_UUID, &device->flags); 41369f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); 413782bc0194SAndreas Gruenbacher return -EIO; 4138481c6f50SPhilipp Reisner } 4139b30ab791SAndreas Gruenbacher rv = _drbd_set_state(device, ns, cs_flags, NULL); 4140b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 41410500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4142b411b363SPhilipp Reisner 4143b411b363SPhilipp Reisner if (rv < SS_SUCCESS) { 41449f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 414582bc0194SAndreas Gruenbacher return -EIO; 4146b411b363SPhilipp Reisner } 4147b411b363SPhilipp Reisner 41484ac4aadaSLars Ellenberg if (os.conn > C_WF_REPORT_PARAMS) { 41494ac4aadaSLars Ellenberg if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 4150b411b363SPhilipp Reisner peer_state.disk != D_NEGOTIATING ) { 4151b411b363SPhilipp Reisner /* we want resync, peer has not yet decided to sync... */ 4152b411b363SPhilipp Reisner /* Nowadays only used when forcing a node into primary role and 4153b411b363SPhilipp Reisner setting its disk to UpToDate with that */ 415469a22773SAndreas Gruenbacher drbd_send_uuids(peer_device); 415569a22773SAndreas Gruenbacher drbd_send_current_state(peer_device); 4156b411b363SPhilipp Reisner } 4157b411b363SPhilipp Reisner } 4158b411b363SPhilipp Reisner 4159b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 4160b411b363SPhilipp Reisner 4161b30ab791SAndreas Gruenbacher drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ 4162b411b363SPhilipp Reisner 416382bc0194SAndreas Gruenbacher return 0; 4164b411b363SPhilipp Reisner } 4165b411b363SPhilipp Reisner 4166bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) 4167b411b363SPhilipp Reisner { 41689f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4169b30ab791SAndreas Gruenbacher struct drbd_device *device; 4170e658983aSAndreas Gruenbacher struct p_rs_uuid *p = pi->data; 41714a76b161SAndreas Gruenbacher 41729f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 41739f4fe9adSAndreas Gruenbacher if (!peer_device) 41744a76b161SAndreas Gruenbacher return -EIO; 41759f4fe9adSAndreas Gruenbacher device = peer_device->device; 4176b411b363SPhilipp Reisner 4177b30ab791SAndreas Gruenbacher wait_event(device->misc_wait, 4178b30ab791SAndreas Gruenbacher device->state.conn == C_WF_SYNC_UUID || 4179b30ab791SAndreas Gruenbacher device->state.conn == C_BEHIND || 4180b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED || 4181b30ab791SAndreas Gruenbacher device->state.disk < D_NEGOTIATING); 4182b411b363SPhilipp Reisner 41830b0ba1efSAndreas Gruenbacher /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ 4184b411b363SPhilipp Reisner 4185b411b363SPhilipp Reisner /* Here the _drbd_uuid_ functions are right, current should 4186b411b363SPhilipp Reisner _not_ be rotated into the history */ 4187b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_NEGOTIATING)) { 4188b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); 4189b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0UL); 4190b411b363SPhilipp Reisner 4191b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated sync uuid"); 4192b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_TARGET); 4193b411b363SPhilipp Reisner 4194b30ab791SAndreas Gruenbacher put_ldev(device); 4195b411b363SPhilipp Reisner } else 4196d0180171SAndreas Gruenbacher drbd_err(device, "Ignoring SyncUUID packet!\n"); 4197b411b363SPhilipp Reisner 419882bc0194SAndreas Gruenbacher return 0; 4199b411b363SPhilipp Reisner } 4200b411b363SPhilipp Reisner 42012c46407dSAndreas Gruenbacher /** 42022c46407dSAndreas Gruenbacher * receive_bitmap_plain 42032c46407dSAndreas Gruenbacher * 42042c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 42052c46407dSAndreas Gruenbacher * code upon failure. 42062c46407dSAndreas Gruenbacher */ 42072c46407dSAndreas Gruenbacher static int 420869a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, 4209e658983aSAndreas Gruenbacher unsigned long *p, struct bm_xfer_ctx *c) 4210b411b363SPhilipp Reisner { 421150d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - 421269a22773SAndreas Gruenbacher drbd_header_size(peer_device->connection); 4213e658983aSAndreas Gruenbacher unsigned int num_words = min_t(size_t, data_size / sizeof(*p), 421450d0b1adSAndreas Gruenbacher c->bm_words - c->word_offset); 4215e658983aSAndreas Gruenbacher unsigned int want = num_words * sizeof(*p); 42162c46407dSAndreas Gruenbacher int err; 4217b411b363SPhilipp Reisner 421850d0b1adSAndreas Gruenbacher if (want != size) { 421969a22773SAndreas Gruenbacher drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); 42202c46407dSAndreas Gruenbacher return -EIO; 4221b411b363SPhilipp Reisner } 4222b411b363SPhilipp Reisner if (want == 0) 42232c46407dSAndreas Gruenbacher return 0; 422469a22773SAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, want); 422582bc0194SAndreas Gruenbacher if (err) 42262c46407dSAndreas Gruenbacher return err; 4227b411b363SPhilipp Reisner 422869a22773SAndreas Gruenbacher drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); 4229b411b363SPhilipp Reisner 4230b411b363SPhilipp Reisner c->word_offset += num_words; 4231b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG; 4232b411b363SPhilipp Reisner if (c->bit_offset > c->bm_bits) 4233b411b363SPhilipp Reisner c->bit_offset = c->bm_bits; 4234b411b363SPhilipp Reisner 42352c46407dSAndreas Gruenbacher return 1; 4236b411b363SPhilipp Reisner } 4237b411b363SPhilipp Reisner 4238a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) 4239a02d1240SAndreas Gruenbacher { 4240a02d1240SAndreas Gruenbacher return (enum drbd_bitmap_code)(p->encoding & 0x0f); 4241a02d1240SAndreas Gruenbacher } 4242a02d1240SAndreas Gruenbacher 4243a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p) 4244a02d1240SAndreas Gruenbacher { 4245a02d1240SAndreas Gruenbacher return (p->encoding & 0x80) != 0; 4246a02d1240SAndreas Gruenbacher } 4247a02d1240SAndreas Gruenbacher 4248a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p) 4249a02d1240SAndreas Gruenbacher { 4250a02d1240SAndreas Gruenbacher return (p->encoding >> 4) & 0x7; 4251a02d1240SAndreas Gruenbacher } 4252a02d1240SAndreas Gruenbacher 42532c46407dSAndreas Gruenbacher /** 42542c46407dSAndreas Gruenbacher * recv_bm_rle_bits 42552c46407dSAndreas Gruenbacher * 42562c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 42572c46407dSAndreas Gruenbacher * code upon failure. 42582c46407dSAndreas Gruenbacher */ 42592c46407dSAndreas Gruenbacher static int 426069a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device, 4261b411b363SPhilipp Reisner struct p_compressed_bm *p, 4262c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4263c6d25cfeSPhilipp Reisner unsigned int len) 4264b411b363SPhilipp Reisner { 4265b411b363SPhilipp Reisner struct bitstream bs; 4266b411b363SPhilipp Reisner u64 look_ahead; 4267b411b363SPhilipp Reisner u64 rl; 4268b411b363SPhilipp Reisner u64 tmp; 4269b411b363SPhilipp Reisner unsigned long s = c->bit_offset; 4270b411b363SPhilipp Reisner unsigned long e; 4271a02d1240SAndreas Gruenbacher int toggle = dcbp_get_start(p); 4272b411b363SPhilipp Reisner int have; 4273b411b363SPhilipp Reisner int bits; 4274b411b363SPhilipp Reisner 4275a02d1240SAndreas Gruenbacher bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); 4276b411b363SPhilipp Reisner 4277b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &look_ahead, 64); 4278b411b363SPhilipp Reisner if (bits < 0) 42792c46407dSAndreas Gruenbacher return -EIO; 4280b411b363SPhilipp Reisner 4281b411b363SPhilipp Reisner for (have = bits; have > 0; s += rl, toggle = !toggle) { 4282b411b363SPhilipp Reisner bits = vli_decode_bits(&rl, look_ahead); 4283b411b363SPhilipp Reisner if (bits <= 0) 42842c46407dSAndreas Gruenbacher return -EIO; 4285b411b363SPhilipp Reisner 4286b411b363SPhilipp Reisner if (toggle) { 4287b411b363SPhilipp Reisner e = s + rl -1; 4288b411b363SPhilipp Reisner if (e >= c->bm_bits) { 428969a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 42902c46407dSAndreas Gruenbacher return -EIO; 4291b411b363SPhilipp Reisner } 429269a22773SAndreas Gruenbacher _drbd_bm_set_bits(peer_device->device, s, e); 4293b411b363SPhilipp Reisner } 4294b411b363SPhilipp Reisner 4295b411b363SPhilipp Reisner if (have < bits) { 429669a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", 4297b411b363SPhilipp Reisner have, bits, look_ahead, 4298b411b363SPhilipp Reisner (unsigned int)(bs.cur.b - p->code), 4299b411b363SPhilipp Reisner (unsigned int)bs.buf_len); 43002c46407dSAndreas Gruenbacher return -EIO; 4301b411b363SPhilipp Reisner } 4302d2da5b0cSLars Ellenberg /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */ 4303d2da5b0cSLars Ellenberg if (likely(bits < 64)) 4304b411b363SPhilipp Reisner look_ahead >>= bits; 4305d2da5b0cSLars Ellenberg else 4306d2da5b0cSLars Ellenberg look_ahead = 0; 4307b411b363SPhilipp Reisner have -= bits; 4308b411b363SPhilipp Reisner 4309b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &tmp, 64 - have); 4310b411b363SPhilipp Reisner if (bits < 0) 43112c46407dSAndreas Gruenbacher return -EIO; 4312b411b363SPhilipp Reisner look_ahead |= tmp << have; 4313b411b363SPhilipp Reisner have += bits; 4314b411b363SPhilipp Reisner } 4315b411b363SPhilipp Reisner 4316b411b363SPhilipp Reisner c->bit_offset = s; 4317b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c); 4318b411b363SPhilipp Reisner 43192c46407dSAndreas Gruenbacher return (s != c->bm_bits); 4320b411b363SPhilipp Reisner } 4321b411b363SPhilipp Reisner 43222c46407dSAndreas Gruenbacher /** 43232c46407dSAndreas Gruenbacher * decode_bitmap_c 43242c46407dSAndreas Gruenbacher * 43252c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 43262c46407dSAndreas Gruenbacher * code upon failure. 43272c46407dSAndreas Gruenbacher */ 43282c46407dSAndreas Gruenbacher static int 432969a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device, 4330b411b363SPhilipp Reisner struct p_compressed_bm *p, 4331c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4332c6d25cfeSPhilipp Reisner unsigned int len) 4333b411b363SPhilipp Reisner { 4334a02d1240SAndreas Gruenbacher if (dcbp_get_code(p) == RLE_VLI_Bits) 433569a22773SAndreas Gruenbacher return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); 4336b411b363SPhilipp Reisner 4337b411b363SPhilipp Reisner /* other variants had been implemented for evaluation, 4338b411b363SPhilipp Reisner * but have been dropped as this one turned out to be "best" 4339b411b363SPhilipp Reisner * during all our tests. */ 4340b411b363SPhilipp Reisner 434169a22773SAndreas Gruenbacher drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 434269a22773SAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 43432c46407dSAndreas Gruenbacher return -EIO; 4344b411b363SPhilipp Reisner } 4345b411b363SPhilipp Reisner 4346b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device, 4347b411b363SPhilipp Reisner const char *direction, struct bm_xfer_ctx *c) 4348b411b363SPhilipp Reisner { 4349b411b363SPhilipp Reisner /* what would it take to transfer it "plaintext" */ 4350a6b32bc3SAndreas Gruenbacher unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); 435150d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 435250d0b1adSAndreas Gruenbacher unsigned int plain = 435350d0b1adSAndreas Gruenbacher header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + 435450d0b1adSAndreas Gruenbacher c->bm_words * sizeof(unsigned long); 435550d0b1adSAndreas Gruenbacher unsigned int total = c->bytes[0] + c->bytes[1]; 435650d0b1adSAndreas Gruenbacher unsigned int r; 4357b411b363SPhilipp Reisner 4358b411b363SPhilipp Reisner /* total can not be zero. but just in case: */ 4359b411b363SPhilipp Reisner if (total == 0) 4360b411b363SPhilipp Reisner return; 4361b411b363SPhilipp Reisner 4362b411b363SPhilipp Reisner /* don't report if not compressed */ 4363b411b363SPhilipp Reisner if (total >= plain) 4364b411b363SPhilipp Reisner return; 4365b411b363SPhilipp Reisner 4366b411b363SPhilipp Reisner /* total < plain. check for overflow, still */ 4367b411b363SPhilipp Reisner r = (total > UINT_MAX/1000) ? (total / (plain/1000)) 4368b411b363SPhilipp Reisner : (1000 * total / plain); 4369b411b363SPhilipp Reisner 4370b411b363SPhilipp Reisner if (r > 1000) 4371b411b363SPhilipp Reisner r = 1000; 4372b411b363SPhilipp Reisner 4373b411b363SPhilipp Reisner r = 1000 - r; 4374d0180171SAndreas Gruenbacher drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " 4375b411b363SPhilipp Reisner "total %u; compression: %u.%u%%\n", 4376b411b363SPhilipp Reisner direction, 4377b411b363SPhilipp Reisner c->bytes[1], c->packets[1], 4378b411b363SPhilipp Reisner c->bytes[0], c->packets[0], 4379b411b363SPhilipp Reisner total, r/10, r % 10); 4380b411b363SPhilipp Reisner } 4381b411b363SPhilipp Reisner 4382b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher, 4383b411b363SPhilipp Reisner it does not matter if the process it in 32 bit chunks or 64 bit 4384b411b363SPhilipp Reisner chunks as long as it is little endian. (Understand it as byte stream, 4385b411b363SPhilipp Reisner beginning with the lowest byte...) If we would use big endian 4386b411b363SPhilipp Reisner we would need to process it from the highest address to the lowest, 4387b411b363SPhilipp Reisner in order to be agnostic to the 32 vs 64 bits issue. 4388b411b363SPhilipp Reisner 4389b411b363SPhilipp Reisner returns 0 on failure, 1 if we successfully received it. */ 4390bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) 4391b411b363SPhilipp Reisner { 43929f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4393b30ab791SAndreas Gruenbacher struct drbd_device *device; 4394b411b363SPhilipp Reisner struct bm_xfer_ctx c; 43952c46407dSAndreas Gruenbacher int err; 43964a76b161SAndreas Gruenbacher 43979f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 43989f4fe9adSAndreas Gruenbacher if (!peer_device) 43994a76b161SAndreas Gruenbacher return -EIO; 44009f4fe9adSAndreas Gruenbacher device = peer_device->device; 4401b411b363SPhilipp Reisner 4402b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); 440320ceb2b2SLars Ellenberg /* you are supposed to send additional out-of-sync information 440420ceb2b2SLars Ellenberg * if you actually set bits during this phase */ 4405b411b363SPhilipp Reisner 4406b411b363SPhilipp Reisner c = (struct bm_xfer_ctx) { 4407b30ab791SAndreas Gruenbacher .bm_bits = drbd_bm_bits(device), 4408b30ab791SAndreas Gruenbacher .bm_words = drbd_bm_words(device), 4409b411b363SPhilipp Reisner }; 4410b411b363SPhilipp Reisner 44112c46407dSAndreas Gruenbacher for(;;) { 4412e658983aSAndreas Gruenbacher if (pi->cmd == P_BITMAP) 441369a22773SAndreas Gruenbacher err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); 4414e658983aSAndreas Gruenbacher else if (pi->cmd == P_COMPRESSED_BITMAP) { 4415b411b363SPhilipp Reisner /* MAYBE: sanity check that we speak proto >= 90, 4416b411b363SPhilipp Reisner * and the feature is enabled! */ 4417e658983aSAndreas Gruenbacher struct p_compressed_bm *p = pi->data; 4418b411b363SPhilipp Reisner 4419bde89a9eSAndreas Gruenbacher if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { 4420d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too large\n"); 442182bc0194SAndreas Gruenbacher err = -EIO; 4422b411b363SPhilipp Reisner goto out; 4423b411b363SPhilipp Reisner } 4424e658983aSAndreas Gruenbacher if (pi->size <= sizeof(*p)) { 4425d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); 442682bc0194SAndreas Gruenbacher err = -EIO; 442778fcbdaeSAndreas Gruenbacher goto out; 4428b411b363SPhilipp Reisner } 44299f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, pi->size); 4430e658983aSAndreas Gruenbacher if (err) 4431e658983aSAndreas Gruenbacher goto out; 443269a22773SAndreas Gruenbacher err = decode_bitmap_c(peer_device, p, &c, pi->size); 4433b411b363SPhilipp Reisner } else { 4434d0180171SAndreas Gruenbacher drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); 443582bc0194SAndreas Gruenbacher err = -EIO; 4436b411b363SPhilipp Reisner goto out; 4437b411b363SPhilipp Reisner } 4438b411b363SPhilipp Reisner 4439e2857216SAndreas Gruenbacher c.packets[pi->cmd == P_BITMAP]++; 4440bde89a9eSAndreas Gruenbacher c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; 4441b411b363SPhilipp Reisner 44422c46407dSAndreas Gruenbacher if (err <= 0) { 44432c46407dSAndreas Gruenbacher if (err < 0) 44442c46407dSAndreas Gruenbacher goto out; 4445b411b363SPhilipp Reisner break; 44462c46407dSAndreas Gruenbacher } 44479f4fe9adSAndreas Gruenbacher err = drbd_recv_header(peer_device->connection, pi); 444882bc0194SAndreas Gruenbacher if (err) 4449b411b363SPhilipp Reisner goto out; 44502c46407dSAndreas Gruenbacher } 4451b411b363SPhilipp Reisner 4452b30ab791SAndreas Gruenbacher INFO_bm_xfer_stats(device, "receive", &c); 4453b411b363SPhilipp Reisner 4454b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_BITMAP_T) { 4455de1f8e4aSAndreas Gruenbacher enum drbd_state_rv rv; 4456de1f8e4aSAndreas Gruenbacher 4457b30ab791SAndreas Gruenbacher err = drbd_send_bitmap(device); 445882bc0194SAndreas Gruenbacher if (err) 4459b411b363SPhilipp Reisner goto out; 4460b411b363SPhilipp Reisner /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 4461b30ab791SAndreas Gruenbacher rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 44620b0ba1efSAndreas Gruenbacher D_ASSERT(device, rv == SS_SUCCESS); 4463b30ab791SAndreas Gruenbacher } else if (device->state.conn != C_WF_BITMAP_S) { 4464b411b363SPhilipp Reisner /* admin may have requested C_DISCONNECTING, 4465b411b363SPhilipp Reisner * other threads may have noticed network errors */ 4466d0180171SAndreas Gruenbacher drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", 4467b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4468b411b363SPhilipp Reisner } 446982bc0194SAndreas Gruenbacher err = 0; 4470b411b363SPhilipp Reisner 4471b411b363SPhilipp Reisner out: 4472b30ab791SAndreas Gruenbacher drbd_bm_unlock(device); 4473b30ab791SAndreas Gruenbacher if (!err && device->state.conn == C_WF_BITMAP_S) 4474b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 447582bc0194SAndreas Gruenbacher return err; 4476b411b363SPhilipp Reisner } 4477b411b363SPhilipp Reisner 4478bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi) 4479b411b363SPhilipp Reisner { 44801ec861ebSAndreas Gruenbacher drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", 4481e2857216SAndreas Gruenbacher pi->cmd, pi->size); 4482b411b363SPhilipp Reisner 4483bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 4484b411b363SPhilipp Reisner } 4485b411b363SPhilipp Reisner 4486bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) 4487b411b363SPhilipp Reisner { 4488b411b363SPhilipp Reisner /* Make sure we've acked all the TCP data associated 4489b411b363SPhilipp Reisner * with the data requests being unplugged */ 4490bde89a9eSAndreas Gruenbacher drbd_tcp_quickack(connection->data.socket); 4491b411b363SPhilipp Reisner 449282bc0194SAndreas Gruenbacher return 0; 4493b411b363SPhilipp Reisner } 4494b411b363SPhilipp Reisner 4495bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) 449673a01a18SPhilipp Reisner { 44979f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4498b30ab791SAndreas Gruenbacher struct drbd_device *device; 4499e658983aSAndreas Gruenbacher struct p_block_desc *p = pi->data; 45004a76b161SAndreas Gruenbacher 45019f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 45029f4fe9adSAndreas Gruenbacher if (!peer_device) 45034a76b161SAndreas Gruenbacher return -EIO; 45049f4fe9adSAndreas Gruenbacher device = peer_device->device; 450573a01a18SPhilipp Reisner 4506b30ab791SAndreas Gruenbacher switch (device->state.conn) { 4507f735e363SLars Ellenberg case C_WF_SYNC_UUID: 4508f735e363SLars Ellenberg case C_WF_BITMAP_T: 4509f735e363SLars Ellenberg case C_BEHIND: 4510f735e363SLars Ellenberg break; 4511f735e363SLars Ellenberg default: 4512d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", 4513b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4514f735e363SLars Ellenberg } 4515f735e363SLars Ellenberg 4516b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 451773a01a18SPhilipp Reisner 451882bc0194SAndreas Gruenbacher return 0; 451973a01a18SPhilipp Reisner } 452073a01a18SPhilipp Reisner 452102918be2SPhilipp Reisner struct data_cmd { 452202918be2SPhilipp Reisner int expect_payload; 452302918be2SPhilipp Reisner size_t pkt_size; 4524bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *, struct packet_info *); 4525b411b363SPhilipp Reisner }; 4526b411b363SPhilipp Reisner 452702918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = { 452802918be2SPhilipp Reisner [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, 452902918be2SPhilipp Reisner [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 453002918be2SPhilipp Reisner [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 453102918be2SPhilipp Reisner [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 4532e658983aSAndreas Gruenbacher [P_BITMAP] = { 1, 0, receive_bitmap } , 4533e658983aSAndreas Gruenbacher [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , 4534e658983aSAndreas Gruenbacher [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, 453502918be2SPhilipp Reisner [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 453602918be2SPhilipp Reisner [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 4537e658983aSAndreas Gruenbacher [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, 4538e658983aSAndreas Gruenbacher [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, 453902918be2SPhilipp Reisner [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 454002918be2SPhilipp Reisner [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 454102918be2SPhilipp Reisner [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 454202918be2SPhilipp Reisner [P_STATE] = { 0, sizeof(struct p_state), receive_state }, 454302918be2SPhilipp Reisner [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, 454402918be2SPhilipp Reisner [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, 454502918be2SPhilipp Reisner [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 454602918be2SPhilipp Reisner [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 454702918be2SPhilipp Reisner [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 454802918be2SPhilipp Reisner [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 454973a01a18SPhilipp Reisner [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 45504a76b161SAndreas Gruenbacher [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 4551036b17eaSPhilipp Reisner [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 4552a0fb3c47SLars Ellenberg [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 455302918be2SPhilipp Reisner }; 455402918be2SPhilipp Reisner 4555bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection) 4556b411b363SPhilipp Reisner { 455777351055SPhilipp Reisner struct packet_info pi; 455802918be2SPhilipp Reisner size_t shs; /* sub header size */ 455982bc0194SAndreas Gruenbacher int err; 4560b411b363SPhilipp Reisner 4561bde89a9eSAndreas Gruenbacher while (get_t_state(&connection->receiver) == RUNNING) { 4562deebe195SAndreas Gruenbacher struct data_cmd *cmd; 4563deebe195SAndreas Gruenbacher 4564bde89a9eSAndreas Gruenbacher drbd_thread_current_set_cpu(&connection->receiver); 4565bde89a9eSAndreas Gruenbacher if (drbd_recv_header(connection, &pi)) 456602918be2SPhilipp Reisner goto err_out; 456702918be2SPhilipp Reisner 4568deebe195SAndreas Gruenbacher cmd = &drbd_cmd_handler[pi.cmd]; 45694a76b161SAndreas Gruenbacher if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { 45701ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected data packet %s (0x%04x)", 45712fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 457202918be2SPhilipp Reisner goto err_out; 45730b33a916SLars Ellenberg } 4574b411b363SPhilipp Reisner 4575e658983aSAndreas Gruenbacher shs = cmd->pkt_size; 4576e658983aSAndreas Gruenbacher if (pi.size > shs && !cmd->expect_payload) { 45771ec861ebSAndreas Gruenbacher drbd_err(connection, "No payload expected %s l:%d\n", 45782fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.size); 4579c13f7e1aSLars Ellenberg goto err_out; 4580c13f7e1aSLars Ellenberg } 4581c13f7e1aSLars Ellenberg 4582c13f7e1aSLars Ellenberg if (shs) { 4583bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, pi.data, shs); 4584a5c31904SAndreas Gruenbacher if (err) 458502918be2SPhilipp Reisner goto err_out; 4586e2857216SAndreas Gruenbacher pi.size -= shs; 4587b411b363SPhilipp Reisner } 458802918be2SPhilipp Reisner 4589bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 45904a76b161SAndreas Gruenbacher if (err) { 45911ec861ebSAndreas Gruenbacher drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 45929f5bdc33SAndreas Gruenbacher cmdname(pi.cmd), err, pi.size); 459302918be2SPhilipp Reisner goto err_out; 459402918be2SPhilipp Reisner } 459502918be2SPhilipp Reisner } 459682bc0194SAndreas Gruenbacher return; 459702918be2SPhilipp Reisner 459802918be2SPhilipp Reisner err_out: 4599bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 4600b411b363SPhilipp Reisner } 4601b411b363SPhilipp Reisner 4602bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection) 4603f70b3511SPhilipp Reisner { 4604c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4605bbeb641cSPhilipp Reisner enum drbd_conns oc; 4606376694a0SPhilipp Reisner int vnr; 4607f70b3511SPhilipp Reisner 4608bde89a9eSAndreas Gruenbacher if (connection->cstate == C_STANDALONE) 4609b411b363SPhilipp Reisner return; 4610b411b363SPhilipp Reisner 4611545752d5SLars Ellenberg /* We are about to start the cleanup after connection loss. 4612545752d5SLars Ellenberg * Make sure drbd_make_request knows about that. 4613545752d5SLars Ellenberg * Usually we should be in some network failure state already, 4614545752d5SLars Ellenberg * but just in case we are not, we fix it up here. 4615545752d5SLars Ellenberg */ 4616bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 4617545752d5SLars Ellenberg 4618b411b363SPhilipp Reisner /* asender does not clean up anything. it must not interfere, either */ 4619bde89a9eSAndreas Gruenbacher drbd_thread_stop(&connection->asender); 4620bde89a9eSAndreas Gruenbacher drbd_free_sock(connection); 4621360cc740SPhilipp Reisner 4622c141ebdaSPhilipp Reisner rcu_read_lock(); 4623c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 4624c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 4625b30ab791SAndreas Gruenbacher kref_get(&device->kref); 4626c141ebdaSPhilipp Reisner rcu_read_unlock(); 462769a22773SAndreas Gruenbacher drbd_disconnected(peer_device); 4628c06ece6bSAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 4629c141ebdaSPhilipp Reisner rcu_read_lock(); 4630c141ebdaSPhilipp Reisner } 4631c141ebdaSPhilipp Reisner rcu_read_unlock(); 4632c141ebdaSPhilipp Reisner 4633bde89a9eSAndreas Gruenbacher if (!list_empty(&connection->current_epoch->list)) 46341ec861ebSAndreas Gruenbacher drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); 463512038a3aSPhilipp Reisner /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 4636bde89a9eSAndreas Gruenbacher atomic_set(&connection->current_epoch->epoch_size, 0); 4637bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = false; 463812038a3aSPhilipp Reisner 46391ec861ebSAndreas Gruenbacher drbd_info(connection, "Connection closed\n"); 4640360cc740SPhilipp Reisner 4641bde89a9eSAndreas Gruenbacher if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) 4642bde89a9eSAndreas Gruenbacher conn_try_outdate_peer_async(connection); 4643cb703454SPhilipp Reisner 46440500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 4645bde89a9eSAndreas Gruenbacher oc = connection->cstate; 4646bbeb641cSPhilipp Reisner if (oc >= C_UNCONNECTED) 4647bde89a9eSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 4648bbeb641cSPhilipp Reisner 46490500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 4650360cc740SPhilipp Reisner 4651f3dfa40aSLars Ellenberg if (oc == C_DISCONNECTING) 4652bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); 4653360cc740SPhilipp Reisner } 4654360cc740SPhilipp Reisner 465569a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device) 4656360cc740SPhilipp Reisner { 465769a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 4658360cc740SPhilipp Reisner unsigned int i; 4659b411b363SPhilipp Reisner 466085719573SPhilipp Reisner /* wait for current activity to cease. */ 46610500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4662b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->active_ee); 4663b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->sync_ee); 4664b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->read_ee); 46650500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4666b411b363SPhilipp Reisner 4667b411b363SPhilipp Reisner /* We do not have data structures that would allow us to 4668b411b363SPhilipp Reisner * get the rs_pending_cnt down to 0 again. 4669b411b363SPhilipp Reisner * * On C_SYNC_TARGET we do not have any data structures describing 4670b411b363SPhilipp Reisner * the pending RSDataRequest's we have sent. 4671b411b363SPhilipp Reisner * * On C_SYNC_SOURCE there is no data structure that tracks 4672b411b363SPhilipp Reisner * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. 4673b411b363SPhilipp Reisner * And no, it is not the sum of the reference counts in the 4674b411b363SPhilipp Reisner * resync_LRU. The resync_LRU tracks the whole operation including 4675b411b363SPhilipp Reisner * the disk-IO, while the rs_pending_cnt only tracks the blocks 4676b411b363SPhilipp Reisner * on the fly. */ 4677b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 4678b30ab791SAndreas Gruenbacher device->rs_total = 0; 4679b30ab791SAndreas Gruenbacher device->rs_failed = 0; 4680b30ab791SAndreas Gruenbacher atomic_set(&device->rs_pending_cnt, 0); 4681b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 4682b411b363SPhilipp Reisner 4683b30ab791SAndreas Gruenbacher del_timer_sync(&device->resync_timer); 4684b30ab791SAndreas Gruenbacher resync_timer_fn((unsigned long)device); 4685b411b363SPhilipp Reisner 4686b411b363SPhilipp Reisner /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 4687b411b363SPhilipp Reisner * w_make_resync_request etc. which may still be on the worker queue 4688b411b363SPhilipp Reisner * to be "canceled" */ 4689b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 4690b411b363SPhilipp Reisner 4691b30ab791SAndreas Gruenbacher drbd_finish_peer_reqs(device); 4692b411b363SPhilipp Reisner 4693d10b4ea3SPhilipp Reisner /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() 4694d10b4ea3SPhilipp Reisner might have issued a work again. The one before drbd_finish_peer_reqs() is 4695d10b4ea3SPhilipp Reisner necessary to reclain net_ee in drbd_finish_peer_reqs(). */ 4696b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 4697d10b4ea3SPhilipp Reisner 469808332d73SLars Ellenberg /* need to do it again, drbd_finish_peer_reqs() may have populated it 469908332d73SLars Ellenberg * again via drbd_try_clear_on_disk_bm(). */ 4700b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 4701b411b363SPhilipp Reisner 4702b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 4703b30ab791SAndreas Gruenbacher device->p_uuid = NULL; 4704b411b363SPhilipp Reisner 4705b30ab791SAndreas Gruenbacher if (!drbd_suspended(device)) 470669a22773SAndreas Gruenbacher tl_clear(peer_device->connection); 4707b411b363SPhilipp Reisner 4708b30ab791SAndreas Gruenbacher drbd_md_sync(device); 4709b411b363SPhilipp Reisner 471020ceb2b2SLars Ellenberg /* serialize with bitmap writeout triggered by the state change, 471120ceb2b2SLars Ellenberg * if any. */ 4712b30ab791SAndreas Gruenbacher wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 471320ceb2b2SLars Ellenberg 4714b411b363SPhilipp Reisner /* tcp_close and release of sendpage pages can be deferred. I don't 4715b411b363SPhilipp Reisner * want to use SO_LINGER, because apparently it can be deferred for 4716b411b363SPhilipp Reisner * more than 20 seconds (longest time I checked). 4717b411b363SPhilipp Reisner * 4718b411b363SPhilipp Reisner * Actually we don't care for exactly when the network stack does its 4719b411b363SPhilipp Reisner * put_page(), but release our reference on these pages right here. 4720b411b363SPhilipp Reisner */ 4721b30ab791SAndreas Gruenbacher i = drbd_free_peer_reqs(device, &device->net_ee); 4722b411b363SPhilipp Reisner if (i) 4723d0180171SAndreas Gruenbacher drbd_info(device, "net_ee not empty, killed %u entries\n", i); 4724b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use_by_net); 4725435f0740SLars Ellenberg if (i) 4726d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); 4727b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use); 4728b411b363SPhilipp Reisner if (i) 4729d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use = %d, expected 0\n", i); 4730b411b363SPhilipp Reisner 47310b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->read_ee)); 47320b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->active_ee)); 47330b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->sync_ee)); 47340b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->done_ee)); 4735b411b363SPhilipp Reisner 4736360cc740SPhilipp Reisner return 0; 4737b411b363SPhilipp Reisner } 4738b411b363SPhilipp Reisner 4739b411b363SPhilipp Reisner /* 4740b411b363SPhilipp Reisner * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version 4741b411b363SPhilipp Reisner * we can agree on is stored in agreed_pro_version. 4742b411b363SPhilipp Reisner * 4743b411b363SPhilipp Reisner * feature flags and the reserved array should be enough room for future 4744b411b363SPhilipp Reisner * enhancements of the handshake protocol, and possible plugins... 4745b411b363SPhilipp Reisner * 4746b411b363SPhilipp Reisner * for now, they are expected to be zero, but ignored. 4747b411b363SPhilipp Reisner */ 4748bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection) 4749b411b363SPhilipp Reisner { 47509f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 47519f5bdc33SAndreas Gruenbacher struct p_connection_features *p; 4752b411b363SPhilipp Reisner 4753bde89a9eSAndreas Gruenbacher sock = &connection->data; 4754bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 47559f5bdc33SAndreas Gruenbacher if (!p) 4756e8d17b01SAndreas Gruenbacher return -EIO; 4757b411b363SPhilipp Reisner memset(p, 0, sizeof(*p)); 4758b411b363SPhilipp Reisner p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 4759b411b363SPhilipp Reisner p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 476020c68fdeSLars Ellenberg p->feature_flags = cpu_to_be32(PRO_FEATURES); 4761bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); 4762b411b363SPhilipp Reisner } 4763b411b363SPhilipp Reisner 4764b411b363SPhilipp Reisner /* 4765b411b363SPhilipp Reisner * return values: 4766b411b363SPhilipp Reisner * 1 yes, we have a valid connection 4767b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 4768b411b363SPhilipp Reisner * -1 peer talks different language, 4769b411b363SPhilipp Reisner * no point in trying again, please go standalone. 4770b411b363SPhilipp Reisner */ 4771bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection) 4772b411b363SPhilipp Reisner { 4773bde89a9eSAndreas Gruenbacher /* ASSERT current == connection->receiver ... */ 4774e658983aSAndreas Gruenbacher struct p_connection_features *p; 4775e658983aSAndreas Gruenbacher const int expect = sizeof(struct p_connection_features); 477677351055SPhilipp Reisner struct packet_info pi; 4777a5c31904SAndreas Gruenbacher int err; 4778b411b363SPhilipp Reisner 4779bde89a9eSAndreas Gruenbacher err = drbd_send_features(connection); 4780e8d17b01SAndreas Gruenbacher if (err) 4781b411b363SPhilipp Reisner return 0; 4782b411b363SPhilipp Reisner 4783bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 478469bc7bc3SAndreas Gruenbacher if (err) 4785b411b363SPhilipp Reisner return 0; 4786b411b363SPhilipp Reisner 47876038178eSAndreas Gruenbacher if (pi.cmd != P_CONNECTION_FEATURES) { 47881ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", 478977351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 4790b411b363SPhilipp Reisner return -1; 4791b411b363SPhilipp Reisner } 4792b411b363SPhilipp Reisner 479377351055SPhilipp Reisner if (pi.size != expect) { 47941ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", 479577351055SPhilipp Reisner expect, pi.size); 4796b411b363SPhilipp Reisner return -1; 4797b411b363SPhilipp Reisner } 4798b411b363SPhilipp Reisner 4799e658983aSAndreas Gruenbacher p = pi.data; 4800bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, p, expect); 4801a5c31904SAndreas Gruenbacher if (err) 4802b411b363SPhilipp Reisner return 0; 4803b411b363SPhilipp Reisner 4804b411b363SPhilipp Reisner p->protocol_min = be32_to_cpu(p->protocol_min); 4805b411b363SPhilipp Reisner p->protocol_max = be32_to_cpu(p->protocol_max); 4806b411b363SPhilipp Reisner if (p->protocol_max == 0) 4807b411b363SPhilipp Reisner p->protocol_max = p->protocol_min; 4808b411b363SPhilipp Reisner 4809b411b363SPhilipp Reisner if (PRO_VERSION_MAX < p->protocol_min || 4810b411b363SPhilipp Reisner PRO_VERSION_MIN > p->protocol_max) 4811b411b363SPhilipp Reisner goto incompat; 4812b411b363SPhilipp Reisner 4813bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 481420c68fdeSLars Ellenberg connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); 4815b411b363SPhilipp Reisner 48161ec861ebSAndreas Gruenbacher drbd_info(connection, "Handshake successful: " 4817bde89a9eSAndreas Gruenbacher "Agreed network protocol version %d\n", connection->agreed_pro_version); 4818b411b363SPhilipp Reisner 481920c68fdeSLars Ellenberg drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n", 482020c68fdeSLars Ellenberg connection->agreed_features & FF_TRIM ? " " : " not "); 482120c68fdeSLars Ellenberg 4822b411b363SPhilipp Reisner return 1; 4823b411b363SPhilipp Reisner 4824b411b363SPhilipp Reisner incompat: 48251ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible DRBD dialects: " 4826b411b363SPhilipp Reisner "I support %d-%d, peer supports %d-%d\n", 4827b411b363SPhilipp Reisner PRO_VERSION_MIN, PRO_VERSION_MAX, 4828b411b363SPhilipp Reisner p->protocol_min, p->protocol_max); 4829b411b363SPhilipp Reisner return -1; 4830b411b363SPhilipp Reisner } 4831b411b363SPhilipp Reisner 4832b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 4833bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 4834b411b363SPhilipp Reisner { 48351ec861ebSAndreas Gruenbacher drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 48361ec861ebSAndreas Gruenbacher drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 4837b10d96cbSJohannes Thoma return -1; 4838b411b363SPhilipp Reisner } 4839b411b363SPhilipp Reisner #else 4840b411b363SPhilipp Reisner #define CHALLENGE_LEN 64 4841b10d96cbSJohannes Thoma 4842b10d96cbSJohannes Thoma /* Return value: 4843b10d96cbSJohannes Thoma 1 - auth succeeded, 4844b10d96cbSJohannes Thoma 0 - failed, try again (network error), 4845b10d96cbSJohannes Thoma -1 - auth failed, don't try again. 4846b10d96cbSJohannes Thoma */ 4847b10d96cbSJohannes Thoma 4848bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 4849b411b363SPhilipp Reisner { 48509f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 4851b411b363SPhilipp Reisner char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 4852b411b363SPhilipp Reisner struct scatterlist sg; 4853b411b363SPhilipp Reisner char *response = NULL; 4854b411b363SPhilipp Reisner char *right_response = NULL; 4855b411b363SPhilipp Reisner char *peers_ch = NULL; 485644ed167dSPhilipp Reisner unsigned int key_len; 485744ed167dSPhilipp Reisner char secret[SHARED_SECRET_MAX]; /* 64 byte */ 4858b411b363SPhilipp Reisner unsigned int resp_size; 4859b411b363SPhilipp Reisner struct hash_desc desc; 486077351055SPhilipp Reisner struct packet_info pi; 486144ed167dSPhilipp Reisner struct net_conf *nc; 486269bc7bc3SAndreas Gruenbacher int err, rv; 4863b411b363SPhilipp Reisner 48649f5bdc33SAndreas Gruenbacher /* FIXME: Put the challenge/response into the preallocated socket buffer. */ 48659f5bdc33SAndreas Gruenbacher 486644ed167dSPhilipp Reisner rcu_read_lock(); 4867bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 486844ed167dSPhilipp Reisner key_len = strlen(nc->shared_secret); 486944ed167dSPhilipp Reisner memcpy(secret, nc->shared_secret, key_len); 487044ed167dSPhilipp Reisner rcu_read_unlock(); 487144ed167dSPhilipp Reisner 4872bde89a9eSAndreas Gruenbacher desc.tfm = connection->cram_hmac_tfm; 4873b411b363SPhilipp Reisner desc.flags = 0; 4874b411b363SPhilipp Reisner 4875bde89a9eSAndreas Gruenbacher rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 4876b411b363SPhilipp Reisner if (rv) { 48771ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv); 4878b10d96cbSJohannes Thoma rv = -1; 4879b411b363SPhilipp Reisner goto fail; 4880b411b363SPhilipp Reisner } 4881b411b363SPhilipp Reisner 4882b411b363SPhilipp Reisner get_random_bytes(my_challenge, CHALLENGE_LEN); 4883b411b363SPhilipp Reisner 4884bde89a9eSAndreas Gruenbacher sock = &connection->data; 4885bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 48869f5bdc33SAndreas Gruenbacher rv = 0; 48879f5bdc33SAndreas Gruenbacher goto fail; 48889f5bdc33SAndreas Gruenbacher } 4889bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, 48909f5bdc33SAndreas Gruenbacher my_challenge, CHALLENGE_LEN); 4891b411b363SPhilipp Reisner if (!rv) 4892b411b363SPhilipp Reisner goto fail; 4893b411b363SPhilipp Reisner 4894bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 489569bc7bc3SAndreas Gruenbacher if (err) { 4896b411b363SPhilipp Reisner rv = 0; 4897b411b363SPhilipp Reisner goto fail; 4898b411b363SPhilipp Reisner } 4899b411b363SPhilipp Reisner 490077351055SPhilipp Reisner if (pi.cmd != P_AUTH_CHALLENGE) { 49011ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 490277351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 4903b411b363SPhilipp Reisner rv = 0; 4904b411b363SPhilipp Reisner goto fail; 4905b411b363SPhilipp Reisner } 4906b411b363SPhilipp Reisner 490777351055SPhilipp Reisner if (pi.size > CHALLENGE_LEN * 2) { 49081ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge payload too big.\n"); 4909b10d96cbSJohannes Thoma rv = -1; 4910b411b363SPhilipp Reisner goto fail; 4911b411b363SPhilipp Reisner } 4912b411b363SPhilipp Reisner 491367cca286SPhilipp Reisner if (pi.size < CHALLENGE_LEN) { 491467cca286SPhilipp Reisner drbd_err(connection, "AuthChallenge payload too small.\n"); 491567cca286SPhilipp Reisner rv = -1; 491667cca286SPhilipp Reisner goto fail; 491767cca286SPhilipp Reisner } 491867cca286SPhilipp Reisner 491977351055SPhilipp Reisner peers_ch = kmalloc(pi.size, GFP_NOIO); 4920b411b363SPhilipp Reisner if (peers_ch == NULL) { 49211ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of peers_ch failed\n"); 4922b10d96cbSJohannes Thoma rv = -1; 4923b411b363SPhilipp Reisner goto fail; 4924b411b363SPhilipp Reisner } 4925b411b363SPhilipp Reisner 4926bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, peers_ch, pi.size); 4927a5c31904SAndreas Gruenbacher if (err) { 4928b411b363SPhilipp Reisner rv = 0; 4929b411b363SPhilipp Reisner goto fail; 4930b411b363SPhilipp Reisner } 4931b411b363SPhilipp Reisner 493267cca286SPhilipp Reisner if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { 493367cca286SPhilipp Reisner drbd_err(connection, "Peer presented the same challenge!\n"); 493467cca286SPhilipp Reisner rv = -1; 493567cca286SPhilipp Reisner goto fail; 493667cca286SPhilipp Reisner } 493767cca286SPhilipp Reisner 4938bde89a9eSAndreas Gruenbacher resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); 4939b411b363SPhilipp Reisner response = kmalloc(resp_size, GFP_NOIO); 4940b411b363SPhilipp Reisner if (response == NULL) { 49411ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of response failed\n"); 4942b10d96cbSJohannes Thoma rv = -1; 4943b411b363SPhilipp Reisner goto fail; 4944b411b363SPhilipp Reisner } 4945b411b363SPhilipp Reisner 4946b411b363SPhilipp Reisner sg_init_table(&sg, 1); 494777351055SPhilipp Reisner sg_set_buf(&sg, peers_ch, pi.size); 4948b411b363SPhilipp Reisner 4949b411b363SPhilipp Reisner rv = crypto_hash_digest(&desc, &sg, sg.length, response); 4950b411b363SPhilipp Reisner if (rv) { 49511ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 4952b10d96cbSJohannes Thoma rv = -1; 4953b411b363SPhilipp Reisner goto fail; 4954b411b363SPhilipp Reisner } 4955b411b363SPhilipp Reisner 4956bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 49579f5bdc33SAndreas Gruenbacher rv = 0; 49589f5bdc33SAndreas Gruenbacher goto fail; 49599f5bdc33SAndreas Gruenbacher } 4960bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, 49619f5bdc33SAndreas Gruenbacher response, resp_size); 4962b411b363SPhilipp Reisner if (!rv) 4963b411b363SPhilipp Reisner goto fail; 4964b411b363SPhilipp Reisner 4965bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 496669bc7bc3SAndreas Gruenbacher if (err) { 4967b411b363SPhilipp Reisner rv = 0; 4968b411b363SPhilipp Reisner goto fail; 4969b411b363SPhilipp Reisner } 4970b411b363SPhilipp Reisner 497177351055SPhilipp Reisner if (pi.cmd != P_AUTH_RESPONSE) { 49721ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", 497377351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 4974b411b363SPhilipp Reisner rv = 0; 4975b411b363SPhilipp Reisner goto fail; 4976b411b363SPhilipp Reisner } 4977b411b363SPhilipp Reisner 497877351055SPhilipp Reisner if (pi.size != resp_size) { 49791ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse payload of wrong size\n"); 4980b411b363SPhilipp Reisner rv = 0; 4981b411b363SPhilipp Reisner goto fail; 4982b411b363SPhilipp Reisner } 4983b411b363SPhilipp Reisner 4984bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, response , resp_size); 4985a5c31904SAndreas Gruenbacher if (err) { 4986b411b363SPhilipp Reisner rv = 0; 4987b411b363SPhilipp Reisner goto fail; 4988b411b363SPhilipp Reisner } 4989b411b363SPhilipp Reisner 4990b411b363SPhilipp Reisner right_response = kmalloc(resp_size, GFP_NOIO); 49912d1ee87dSJulia Lawall if (right_response == NULL) { 49921ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of right_response failed\n"); 4993b10d96cbSJohannes Thoma rv = -1; 4994b411b363SPhilipp Reisner goto fail; 4995b411b363SPhilipp Reisner } 4996b411b363SPhilipp Reisner 4997b411b363SPhilipp Reisner sg_set_buf(&sg, my_challenge, CHALLENGE_LEN); 4998b411b363SPhilipp Reisner 4999b411b363SPhilipp Reisner rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); 5000b411b363SPhilipp Reisner if (rv) { 50011ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 5002b10d96cbSJohannes Thoma rv = -1; 5003b411b363SPhilipp Reisner goto fail; 5004b411b363SPhilipp Reisner } 5005b411b363SPhilipp Reisner 5006b411b363SPhilipp Reisner rv = !memcmp(response, right_response, resp_size); 5007b411b363SPhilipp Reisner 5008b411b363SPhilipp Reisner if (rv) 50091ec861ebSAndreas Gruenbacher drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", 501044ed167dSPhilipp Reisner resp_size); 5011b10d96cbSJohannes Thoma else 5012b10d96cbSJohannes Thoma rv = -1; 5013b411b363SPhilipp Reisner 5014b411b363SPhilipp Reisner fail: 5015b411b363SPhilipp Reisner kfree(peers_ch); 5016b411b363SPhilipp Reisner kfree(response); 5017b411b363SPhilipp Reisner kfree(right_response); 5018b411b363SPhilipp Reisner 5019b411b363SPhilipp Reisner return rv; 5020b411b363SPhilipp Reisner } 5021b411b363SPhilipp Reisner #endif 5022b411b363SPhilipp Reisner 50238fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi) 5024b411b363SPhilipp Reisner { 5025bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 5026b411b363SPhilipp Reisner int h; 5027b411b363SPhilipp Reisner 50281ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver (re)started\n"); 5029b411b363SPhilipp Reisner 5030b411b363SPhilipp Reisner do { 5031bde89a9eSAndreas Gruenbacher h = conn_connect(connection); 5032b411b363SPhilipp Reisner if (h == 0) { 5033bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 503420ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ); 5035b411b363SPhilipp Reisner } 5036b411b363SPhilipp Reisner if (h == -1) { 50371ec861ebSAndreas Gruenbacher drbd_warn(connection, "Discarding network configuration.\n"); 5038bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 5039b411b363SPhilipp Reisner } 5040b411b363SPhilipp Reisner } while (h == 0); 5041b411b363SPhilipp Reisner 504291fd4dadSPhilipp Reisner if (h > 0) 5043bde89a9eSAndreas Gruenbacher drbdd(connection); 5044b411b363SPhilipp Reisner 5045bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 5046b411b363SPhilipp Reisner 50471ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver terminated\n"); 5048b411b363SPhilipp Reisner return 0; 5049b411b363SPhilipp Reisner } 5050b411b363SPhilipp Reisner 5051b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */ 5052b411b363SPhilipp Reisner 5053bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5054b411b363SPhilipp Reisner { 5055e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5056b411b363SPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5057b411b363SPhilipp Reisner 5058b411b363SPhilipp Reisner if (retcode >= SS_SUCCESS) { 5059bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); 5060b411b363SPhilipp Reisner } else { 5061bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); 50621ec861ebSAndreas Gruenbacher drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", 5063fc3b10a4SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5064fc3b10a4SPhilipp Reisner } 5065bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5066e4f78edeSPhilipp Reisner 50672735a594SAndreas Gruenbacher return 0; 5068fc3b10a4SPhilipp Reisner } 5069e4f78edeSPhilipp Reisner 5070bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5071e4f78edeSPhilipp Reisner { 50729f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5073b30ab791SAndreas Gruenbacher struct drbd_device *device; 5074e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5075e4f78edeSPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5076e4f78edeSPhilipp Reisner 50779f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 50789f4fe9adSAndreas Gruenbacher if (!peer_device) 50792735a594SAndreas Gruenbacher return -EIO; 50809f4fe9adSAndreas Gruenbacher device = peer_device->device; 50811952e916SAndreas Gruenbacher 5082bde89a9eSAndreas Gruenbacher if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { 50830b0ba1efSAndreas Gruenbacher D_ASSERT(device, connection->agreed_pro_version < 100); 5084bde89a9eSAndreas Gruenbacher return got_conn_RqSReply(connection, pi); 50854d0fc3fdSPhilipp Reisner } 50864d0fc3fdSPhilipp Reisner 5087e4f78edeSPhilipp Reisner if (retcode >= SS_SUCCESS) { 5088b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_SUCCESS, &device->flags); 5089e4f78edeSPhilipp Reisner } else { 5090b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_FAIL, &device->flags); 5091d0180171SAndreas Gruenbacher drbd_err(device, "Requested state change failed by peer: %s (%d)\n", 5092b411b363SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5093b411b363SPhilipp Reisner } 5094b30ab791SAndreas Gruenbacher wake_up(&device->state_wait); 5095b411b363SPhilipp Reisner 50962735a594SAndreas Gruenbacher return 0; 5097b411b363SPhilipp Reisner } 5098b411b363SPhilipp Reisner 5099bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi) 5100b411b363SPhilipp Reisner { 5101bde89a9eSAndreas Gruenbacher return drbd_send_ping_ack(connection); 5102b411b363SPhilipp Reisner 5103b411b363SPhilipp Reisner } 5104b411b363SPhilipp Reisner 5105bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) 5106b411b363SPhilipp Reisner { 5107b411b363SPhilipp Reisner /* restore idle timeout */ 5108bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; 5109bde89a9eSAndreas Gruenbacher if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) 5110bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5111b411b363SPhilipp Reisner 51122735a594SAndreas Gruenbacher return 0; 5113b411b363SPhilipp Reisner } 5114b411b363SPhilipp Reisner 5115bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) 5116b411b363SPhilipp Reisner { 51179f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5118b30ab791SAndreas Gruenbacher struct drbd_device *device; 5119e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5120b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5121b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5122b411b363SPhilipp Reisner 51239f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 51249f4fe9adSAndreas Gruenbacher if (!peer_device) 51252735a594SAndreas Gruenbacher return -EIO; 51269f4fe9adSAndreas Gruenbacher device = peer_device->device; 51271952e916SAndreas Gruenbacher 51289f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 5129b411b363SPhilipp Reisner 513069a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5131b411b363SPhilipp Reisner 5132b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 5133b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5134b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5135b411b363SPhilipp Reisner /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 5136b30ab791SAndreas Gruenbacher device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 5137b30ab791SAndreas Gruenbacher put_ldev(device); 51381d53f09eSLars Ellenberg } 5139b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5140b30ab791SAndreas Gruenbacher atomic_add(blksize >> 9, &device->rs_sect_in); 5141b411b363SPhilipp Reisner 51422735a594SAndreas Gruenbacher return 0; 5143b411b363SPhilipp Reisner } 5144b411b363SPhilipp Reisner 5145bc9c5c41SAndreas Gruenbacher static int 5146b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, 5147bc9c5c41SAndreas Gruenbacher struct rb_root *root, const char *func, 5148bc9c5c41SAndreas Gruenbacher enum drbd_req_event what, bool missing_ok) 5149b411b363SPhilipp Reisner { 5150b411b363SPhilipp Reisner struct drbd_request *req; 5151b411b363SPhilipp Reisner struct bio_and_error m; 5152b411b363SPhilipp Reisner 51530500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 5154b30ab791SAndreas Gruenbacher req = find_request(device, root, id, sector, missing_ok, func); 5155b411b363SPhilipp Reisner if (unlikely(!req)) { 51560500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 515785997675SAndreas Gruenbacher return -EIO; 5158b411b363SPhilipp Reisner } 5159b411b363SPhilipp Reisner __req_mod(req, what, &m); 51600500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 5161b411b363SPhilipp Reisner 5162b411b363SPhilipp Reisner if (m.bio) 5163b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 516485997675SAndreas Gruenbacher return 0; 5165b411b363SPhilipp Reisner } 5166b411b363SPhilipp Reisner 5167bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) 5168b411b363SPhilipp Reisner { 51699f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5170b30ab791SAndreas Gruenbacher struct drbd_device *device; 5171e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5172b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5173b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5174b411b363SPhilipp Reisner enum drbd_req_event what; 5175b411b363SPhilipp Reisner 51769f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 51779f4fe9adSAndreas Gruenbacher if (!peer_device) 51782735a594SAndreas Gruenbacher return -EIO; 51799f4fe9adSAndreas Gruenbacher device = peer_device->device; 51801952e916SAndreas Gruenbacher 518169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5182b411b363SPhilipp Reisner 5183579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5184b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5185b30ab791SAndreas Gruenbacher dec_rs_pending(device); 51862735a594SAndreas Gruenbacher return 0; 5187b411b363SPhilipp Reisner } 5188e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5189b411b363SPhilipp Reisner case P_RS_WRITE_ACK: 51908554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER_AND_SIS; 5191b411b363SPhilipp Reisner break; 5192b411b363SPhilipp Reisner case P_WRITE_ACK: 51938554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER; 5194b411b363SPhilipp Reisner break; 5195b411b363SPhilipp Reisner case P_RECV_ACK: 51968554df1cSAndreas Gruenbacher what = RECV_ACKED_BY_PEER; 5197b411b363SPhilipp Reisner break; 5198d4dabbe2SLars Ellenberg case P_SUPERSEDED: 5199d4dabbe2SLars Ellenberg what = CONFLICT_RESOLVED; 52007be8da07SAndreas Gruenbacher break; 52017be8da07SAndreas Gruenbacher case P_RETRY_WRITE: 52027be8da07SAndreas Gruenbacher what = POSTPONE_WRITE; 5203b411b363SPhilipp Reisner break; 5204b411b363SPhilipp Reisner default: 52052735a594SAndreas Gruenbacher BUG(); 5206b411b363SPhilipp Reisner } 5207b411b363SPhilipp Reisner 5208b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5209b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5210bc9c5c41SAndreas Gruenbacher what, false); 5211b411b363SPhilipp Reisner } 5212b411b363SPhilipp Reisner 5213bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) 5214b411b363SPhilipp Reisner { 52159f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5216b30ab791SAndreas Gruenbacher struct drbd_device *device; 5217e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5218b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 52192deb8336SPhilipp Reisner int size = be32_to_cpu(p->blksize); 522085997675SAndreas Gruenbacher int err; 5221b411b363SPhilipp Reisner 52229f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 52239f4fe9adSAndreas Gruenbacher if (!peer_device) 52242735a594SAndreas Gruenbacher return -EIO; 52259f4fe9adSAndreas Gruenbacher device = peer_device->device; 5226b411b363SPhilipp Reisner 522769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5228b411b363SPhilipp Reisner 5229579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5230b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5231b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 52322735a594SAndreas Gruenbacher return 0; 5233b411b363SPhilipp Reisner } 52342deb8336SPhilipp Reisner 5235b30ab791SAndreas Gruenbacher err = validate_req_change_req_state(device, p->block_id, sector, 5236b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5237303d1448SPhilipp Reisner NEG_ACKED, true); 523885997675SAndreas Gruenbacher if (err) { 52392deb8336SPhilipp Reisner /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 52402deb8336SPhilipp Reisner The master bio might already be completed, therefore the 5241c3afd8f5SAndreas Gruenbacher request is no longer in the collision hash. */ 52422deb8336SPhilipp Reisner /* In Protocol B we might already have got a P_RECV_ACK 52432deb8336SPhilipp Reisner but then get a P_NEG_ACK afterwards. */ 5244b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 52452deb8336SPhilipp Reisner } 52462735a594SAndreas Gruenbacher return 0; 5247b411b363SPhilipp Reisner } 5248b411b363SPhilipp Reisner 5249bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) 5250b411b363SPhilipp Reisner { 52519f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5252b30ab791SAndreas Gruenbacher struct drbd_device *device; 5253e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5254b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5255b411b363SPhilipp Reisner 52569f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 52579f4fe9adSAndreas Gruenbacher if (!peer_device) 52582735a594SAndreas Gruenbacher return -EIO; 52599f4fe9adSAndreas Gruenbacher device = peer_device->device; 52601952e916SAndreas Gruenbacher 526169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 52627be8da07SAndreas Gruenbacher 5263d0180171SAndreas Gruenbacher drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", 5264b411b363SPhilipp Reisner (unsigned long long)sector, be32_to_cpu(p->blksize)); 5265b411b363SPhilipp Reisner 5266b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5267b30ab791SAndreas Gruenbacher &device->read_requests, __func__, 52688554df1cSAndreas Gruenbacher NEG_ACKED, false); 5269b411b363SPhilipp Reisner } 5270b411b363SPhilipp Reisner 5271bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) 5272b411b363SPhilipp Reisner { 52739f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5274b30ab791SAndreas Gruenbacher struct drbd_device *device; 5275b411b363SPhilipp Reisner sector_t sector; 5276b411b363SPhilipp Reisner int size; 5277e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 52781952e916SAndreas Gruenbacher 52799f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 52809f4fe9adSAndreas Gruenbacher if (!peer_device) 52812735a594SAndreas Gruenbacher return -EIO; 52829f4fe9adSAndreas Gruenbacher device = peer_device->device; 5283b411b363SPhilipp Reisner 5284b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5285b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5286b411b363SPhilipp Reisner 528769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5288b411b363SPhilipp Reisner 5289b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5290b411b363SPhilipp Reisner 5291b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 5292b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5293e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5294d612d309SPhilipp Reisner case P_NEG_RS_DREPLY: 5295b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 5296d612d309SPhilipp Reisner case P_RS_CANCEL: 5297d612d309SPhilipp Reisner break; 5298d612d309SPhilipp Reisner default: 52992735a594SAndreas Gruenbacher BUG(); 5300d612d309SPhilipp Reisner } 5301b30ab791SAndreas Gruenbacher put_ldev(device); 5302b411b363SPhilipp Reisner } 5303b411b363SPhilipp Reisner 53042735a594SAndreas Gruenbacher return 0; 5305b411b363SPhilipp Reisner } 5306b411b363SPhilipp Reisner 5307bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) 5308b411b363SPhilipp Reisner { 5309e658983aSAndreas Gruenbacher struct p_barrier_ack *p = pi->data; 5310c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 53119ed57dcbSLars Ellenberg int vnr; 5312b411b363SPhilipp Reisner 5313bde89a9eSAndreas Gruenbacher tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); 5314b411b363SPhilipp Reisner 53159ed57dcbSLars Ellenberg rcu_read_lock(); 5316c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5317c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5318c06ece6bSAndreas Gruenbacher 5319b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD && 5320b30ab791SAndreas Gruenbacher atomic_read(&device->ap_in_flight) == 0 && 5321b30ab791SAndreas Gruenbacher !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { 5322b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ; 5323b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 5324c4752ef1SPhilipp Reisner } 53259ed57dcbSLars Ellenberg } 53269ed57dcbSLars Ellenberg rcu_read_unlock(); 5327c4752ef1SPhilipp Reisner 53282735a594SAndreas Gruenbacher return 0; 5329b411b363SPhilipp Reisner } 5330b411b363SPhilipp Reisner 5331bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) 5332b411b363SPhilipp Reisner { 53339f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5334b30ab791SAndreas Gruenbacher struct drbd_device *device; 5335e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 533684b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 5337b411b363SPhilipp Reisner sector_t sector; 5338b411b363SPhilipp Reisner int size; 5339b411b363SPhilipp Reisner 53409f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 53419f4fe9adSAndreas Gruenbacher if (!peer_device) 53422735a594SAndreas Gruenbacher return -EIO; 53439f4fe9adSAndreas Gruenbacher device = peer_device->device; 53441952e916SAndreas Gruenbacher 5345b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5346b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5347b411b363SPhilipp Reisner 534869a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5349b411b363SPhilipp Reisner 5350b411b363SPhilipp Reisner if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 5351b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 5352b411b363SPhilipp Reisner else 5353b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5354b411b363SPhilipp Reisner 5355b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 53562735a594SAndreas Gruenbacher return 0; 53571d53f09eSLars Ellenberg 5358b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5359b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5360b411b363SPhilipp Reisner 5361b30ab791SAndreas Gruenbacher --device->ov_left; 5362ea5442afSLars Ellenberg 5363ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 5364b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 5365b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 5366ea5442afSLars Ellenberg 5367b30ab791SAndreas Gruenbacher if (device->ov_left == 0) { 536884b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(*dw), GFP_NOIO); 536984b8c06bSAndreas Gruenbacher if (dw) { 537084b8c06bSAndreas Gruenbacher dw->w.cb = w_ov_finished; 537184b8c06bSAndreas Gruenbacher dw->device = device; 537284b8c06bSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &dw->w); 5373b411b363SPhilipp Reisner } else { 537484b8c06bSAndreas Gruenbacher drbd_err(device, "kmalloc(dw) failed."); 5375b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5376b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 5377b411b363SPhilipp Reisner } 5378b411b363SPhilipp Reisner } 5379b30ab791SAndreas Gruenbacher put_ldev(device); 53802735a594SAndreas Gruenbacher return 0; 5381b411b363SPhilipp Reisner } 5382b411b363SPhilipp Reisner 5383bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi) 53840ced55a3SPhilipp Reisner { 53852735a594SAndreas Gruenbacher return 0; 53860ced55a3SPhilipp Reisner } 53870ced55a3SPhilipp Reisner 5388bde89a9eSAndreas Gruenbacher static int connection_finish_peer_reqs(struct drbd_connection *connection) 538932862ec7SPhilipp Reisner { 5390c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5391c141ebdaSPhilipp Reisner int vnr, not_empty = 0; 539232862ec7SPhilipp Reisner 539332862ec7SPhilipp Reisner do { 5394bde89a9eSAndreas Gruenbacher clear_bit(SIGNAL_ASENDER, &connection->flags); 539532862ec7SPhilipp Reisner flush_signals(current); 5396c141ebdaSPhilipp Reisner 5397c141ebdaSPhilipp Reisner rcu_read_lock(); 5398c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5399c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5400b30ab791SAndreas Gruenbacher kref_get(&device->kref); 5401c141ebdaSPhilipp Reisner rcu_read_unlock(); 5402b30ab791SAndreas Gruenbacher if (drbd_finish_peer_reqs(device)) { 540305a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 5404c141ebdaSPhilipp Reisner return 1; 5405082a3439SPhilipp Reisner } 540605a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 5407c141ebdaSPhilipp Reisner rcu_read_lock(); 5408d3fcb490SPhilipp Reisner } 5409bde89a9eSAndreas Gruenbacher set_bit(SIGNAL_ASENDER, &connection->flags); 5410082a3439SPhilipp Reisner 54110500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 5412c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5413c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5414b30ab791SAndreas Gruenbacher not_empty = !list_empty(&device->done_ee); 5415082a3439SPhilipp Reisner if (not_empty) 5416082a3439SPhilipp Reisner break; 5417082a3439SPhilipp Reisner } 54180500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 5419c141ebdaSPhilipp Reisner rcu_read_unlock(); 542032862ec7SPhilipp Reisner } while (not_empty); 542132862ec7SPhilipp Reisner 542232862ec7SPhilipp Reisner return 0; 5423b411b363SPhilipp Reisner } 5424b411b363SPhilipp Reisner 5425b411b363SPhilipp Reisner struct asender_cmd { 5426b411b363SPhilipp Reisner size_t pkt_size; 5427bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *connection, struct packet_info *); 5428b411b363SPhilipp Reisner }; 5429b411b363SPhilipp Reisner 5430b411b363SPhilipp Reisner static struct asender_cmd asender_tbl[] = { 5431e658983aSAndreas Gruenbacher [P_PING] = { 0, got_Ping }, 5432e658983aSAndreas Gruenbacher [P_PING_ACK] = { 0, got_PingAck }, 5433b411b363SPhilipp Reisner [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5434b411b363SPhilipp Reisner [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5435b411b363SPhilipp Reisner [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5436d4dabbe2SLars Ellenberg [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, 5437b411b363SPhilipp Reisner [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 5438b411b363SPhilipp Reisner [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 5439b411b363SPhilipp Reisner [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, 5440b411b363SPhilipp Reisner [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 5441b411b363SPhilipp Reisner [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 5442b411b363SPhilipp Reisner [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 5443b411b363SPhilipp Reisner [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 544402918be2SPhilipp Reisner [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 5445d612d309SPhilipp Reisner [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, 54461952e916SAndreas Gruenbacher [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, 54471952e916SAndreas Gruenbacher [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, 5448b411b363SPhilipp Reisner }; 5449b411b363SPhilipp Reisner 5450b411b363SPhilipp Reisner int drbd_asender(struct drbd_thread *thi) 5451b411b363SPhilipp Reisner { 5452bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 5453b411b363SPhilipp Reisner struct asender_cmd *cmd = NULL; 545477351055SPhilipp Reisner struct packet_info pi; 5455257d0af6SPhilipp Reisner int rv; 5456bde89a9eSAndreas Gruenbacher void *buf = connection->meta.rbuf; 5457b411b363SPhilipp Reisner int received = 0; 5458bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 545952b061a4SAndreas Gruenbacher int expect = header_size; 546044ed167dSPhilipp Reisner bool ping_timeout_active = false; 546144ed167dSPhilipp Reisner struct net_conf *nc; 5462bb77d34eSAndreas Gruenbacher int ping_timeo, tcp_cork, ping_int; 54633990e04dSPhilipp Reisner struct sched_param param = { .sched_priority = 2 }; 5464b411b363SPhilipp Reisner 54653990e04dSPhilipp Reisner rv = sched_setscheduler(current, SCHED_RR, ¶m); 54663990e04dSPhilipp Reisner if (rv < 0) 54671ec861ebSAndreas Gruenbacher drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv); 5468b411b363SPhilipp Reisner 5469e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 547080822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 547144ed167dSPhilipp Reisner 547244ed167dSPhilipp Reisner rcu_read_lock(); 5473bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 547444ed167dSPhilipp Reisner ping_timeo = nc->ping_timeo; 5475bb77d34eSAndreas Gruenbacher tcp_cork = nc->tcp_cork; 547644ed167dSPhilipp Reisner ping_int = nc->ping_int; 547744ed167dSPhilipp Reisner rcu_read_unlock(); 547844ed167dSPhilipp Reisner 5479bde89a9eSAndreas Gruenbacher if (test_and_clear_bit(SEND_PING, &connection->flags)) { 5480bde89a9eSAndreas Gruenbacher if (drbd_send_ping(connection)) { 54811ec861ebSAndreas Gruenbacher drbd_err(connection, "drbd_send_ping has failed\n"); 5482841ce241SAndreas Gruenbacher goto reconnect; 5483841ce241SAndreas Gruenbacher } 5484bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10; 548544ed167dSPhilipp Reisner ping_timeout_active = true; 5486b411b363SPhilipp Reisner } 5487b411b363SPhilipp Reisner 548832862ec7SPhilipp Reisner /* TODO: conditionally cork; it may hurt latency if we cork without 548932862ec7SPhilipp Reisner much to send */ 5490bb77d34eSAndreas Gruenbacher if (tcp_cork) 5491bde89a9eSAndreas Gruenbacher drbd_tcp_cork(connection->meta.socket); 5492bde89a9eSAndreas Gruenbacher if (connection_finish_peer_reqs(connection)) { 54931ec861ebSAndreas Gruenbacher drbd_err(connection, "connection_finish_peer_reqs() failed\n"); 5494b411b363SPhilipp Reisner goto reconnect; 5495b411b363SPhilipp Reisner } 5496b411b363SPhilipp Reisner /* but unconditionally uncork unless disabled */ 5497bb77d34eSAndreas Gruenbacher if (tcp_cork) 5498bde89a9eSAndreas Gruenbacher drbd_tcp_uncork(connection->meta.socket); 5499b411b363SPhilipp Reisner 5500b411b363SPhilipp Reisner /* short circuit, recv_msg would return EINTR anyways. */ 5501b411b363SPhilipp Reisner if (signal_pending(current)) 5502b411b363SPhilipp Reisner continue; 5503b411b363SPhilipp Reisner 5504bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); 5505bde89a9eSAndreas Gruenbacher clear_bit(SIGNAL_ASENDER, &connection->flags); 5506b411b363SPhilipp Reisner 5507b411b363SPhilipp Reisner flush_signals(current); 5508b411b363SPhilipp Reisner 5509b411b363SPhilipp Reisner /* Note: 5510b411b363SPhilipp Reisner * -EINTR (on meta) we got a signal 5511b411b363SPhilipp Reisner * -EAGAIN (on meta) rcvtimeo expired 5512b411b363SPhilipp Reisner * -ECONNRESET other side closed the connection 5513b411b363SPhilipp Reisner * -ERESTARTSYS (on data) we got a signal 5514b411b363SPhilipp Reisner * rv < 0 other than above: unexpected error! 5515b411b363SPhilipp Reisner * rv == expected: full header or command 5516b411b363SPhilipp Reisner * rv < expected: "woken" by signal during receive 5517b411b363SPhilipp Reisner * rv == 0 : "connection shut down by peer" 5518b411b363SPhilipp Reisner */ 5519b411b363SPhilipp Reisner if (likely(rv > 0)) { 5520b411b363SPhilipp Reisner received += rv; 5521b411b363SPhilipp Reisner buf += rv; 5522b411b363SPhilipp Reisner } else if (rv == 0) { 5523bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 5524b66623e3SPhilipp Reisner long t; 5525b66623e3SPhilipp Reisner rcu_read_lock(); 5526bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 5527b66623e3SPhilipp Reisner rcu_read_unlock(); 5528b66623e3SPhilipp Reisner 5529bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, 5530bde89a9eSAndreas Gruenbacher connection->cstate < C_WF_REPORT_PARAMS, 5531b66623e3SPhilipp Reisner t); 5532599377acSPhilipp Reisner if (t) 5533599377acSPhilipp Reisner break; 5534599377acSPhilipp Reisner } 55351ec861ebSAndreas Gruenbacher drbd_err(connection, "meta connection shut down by peer.\n"); 5536b411b363SPhilipp Reisner goto reconnect; 5537b411b363SPhilipp Reisner } else if (rv == -EAGAIN) { 5538cb6518cbSLars Ellenberg /* If the data socket received something meanwhile, 5539cb6518cbSLars Ellenberg * that is good enough: peer is still alive. */ 5540bde89a9eSAndreas Gruenbacher if (time_after(connection->last_received, 5541bde89a9eSAndreas Gruenbacher jiffies - connection->meta.socket->sk->sk_rcvtimeo)) 5542cb6518cbSLars Ellenberg continue; 5543f36af18cSLars Ellenberg if (ping_timeout_active) { 55441ec861ebSAndreas Gruenbacher drbd_err(connection, "PingAck did not arrive in time.\n"); 5545b411b363SPhilipp Reisner goto reconnect; 5546b411b363SPhilipp Reisner } 5547bde89a9eSAndreas Gruenbacher set_bit(SEND_PING, &connection->flags); 5548b411b363SPhilipp Reisner continue; 5549b411b363SPhilipp Reisner } else if (rv == -EINTR) { 5550b411b363SPhilipp Reisner continue; 5551b411b363SPhilipp Reisner } else { 55521ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 5553b411b363SPhilipp Reisner goto reconnect; 5554b411b363SPhilipp Reisner } 5555b411b363SPhilipp Reisner 5556b411b363SPhilipp Reisner if (received == expect && cmd == NULL) { 5557bde89a9eSAndreas Gruenbacher if (decode_header(connection, connection->meta.rbuf, &pi)) 5558b411b363SPhilipp Reisner goto reconnect; 55597201b972SAndreas Gruenbacher cmd = &asender_tbl[pi.cmd]; 55601952e916SAndreas Gruenbacher if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { 55611ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", 55622fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 5563b411b363SPhilipp Reisner goto disconnect; 5564b411b363SPhilipp Reisner } 5565e658983aSAndreas Gruenbacher expect = header_size + cmd->pkt_size; 556652b061a4SAndreas Gruenbacher if (pi.size != expect - header_size) { 55671ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", 556877351055SPhilipp Reisner pi.cmd, pi.size); 5569b411b363SPhilipp Reisner goto reconnect; 5570b411b363SPhilipp Reisner } 5571257d0af6SPhilipp Reisner } 5572b411b363SPhilipp Reisner if (received == expect) { 55732735a594SAndreas Gruenbacher bool err; 5574a4fbda8eSPhilipp Reisner 5575bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 55762735a594SAndreas Gruenbacher if (err) { 55771ec861ebSAndreas Gruenbacher drbd_err(connection, "%pf failed\n", cmd->fn); 5578b411b363SPhilipp Reisner goto reconnect; 55791952e916SAndreas Gruenbacher } 5580b411b363SPhilipp Reisner 5581bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 5582f36af18cSLars Ellenberg 558344ed167dSPhilipp Reisner if (cmd == &asender_tbl[P_PING_ACK]) { 558444ed167dSPhilipp Reisner /* restore idle timeout */ 5585bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ; 558644ed167dSPhilipp Reisner ping_timeout_active = false; 558744ed167dSPhilipp Reisner } 5588b411b363SPhilipp Reisner 5589bde89a9eSAndreas Gruenbacher buf = connection->meta.rbuf; 5590b411b363SPhilipp Reisner received = 0; 559152b061a4SAndreas Gruenbacher expect = header_size; 5592b411b363SPhilipp Reisner cmd = NULL; 5593b411b363SPhilipp Reisner } 5594b411b363SPhilipp Reisner } 5595b411b363SPhilipp Reisner 5596b411b363SPhilipp Reisner if (0) { 5597b411b363SPhilipp Reisner reconnect: 5598bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 5599bde89a9eSAndreas Gruenbacher conn_md_sync(connection); 5600b411b363SPhilipp Reisner } 5601b411b363SPhilipp Reisner if (0) { 5602b411b363SPhilipp Reisner disconnect: 5603bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 5604b411b363SPhilipp Reisner } 5605bde89a9eSAndreas Gruenbacher clear_bit(SIGNAL_ASENDER, &connection->flags); 5606b411b363SPhilipp Reisner 56071ec861ebSAndreas Gruenbacher drbd_info(connection, "asender terminated\n"); 5608b411b363SPhilipp Reisner 5609b411b363SPhilipp Reisner return 0; 5610b411b363SPhilipp Reisner } 5611