1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_receiver.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner */ 24b411b363SPhilipp Reisner 25b411b363SPhilipp Reisner 26b411b363SPhilipp Reisner #include <linux/module.h> 27b411b363SPhilipp Reisner 28b411b363SPhilipp Reisner #include <asm/uaccess.h> 29b411b363SPhilipp Reisner #include <net/sock.h> 30b411b363SPhilipp Reisner 31b411b363SPhilipp Reisner #include <linux/drbd.h> 32b411b363SPhilipp Reisner #include <linux/fs.h> 33b411b363SPhilipp Reisner #include <linux/file.h> 34b411b363SPhilipp Reisner #include <linux/in.h> 35b411b363SPhilipp Reisner #include <linux/mm.h> 36b411b363SPhilipp Reisner #include <linux/memcontrol.h> 37b411b363SPhilipp Reisner #include <linux/mm_inline.h> 38b411b363SPhilipp Reisner #include <linux/slab.h> 39b411b363SPhilipp Reisner #include <linux/pkt_sched.h> 40b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__ 41b411b363SPhilipp Reisner #include <linux/unistd.h> 42b411b363SPhilipp Reisner #include <linux/vmalloc.h> 43b411b363SPhilipp Reisner #include <linux/random.h> 44b411b363SPhilipp Reisner #include <linux/string.h> 45b411b363SPhilipp Reisner #include <linux/scatterlist.h> 46b411b363SPhilipp Reisner #include "drbd_int.h" 47a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 48b411b363SPhilipp Reisner #include "drbd_req.h" 49b411b363SPhilipp Reisner #include "drbd_vli.h" 50b411b363SPhilipp Reisner 5120c68fdeSLars Ellenberg #define PRO_FEATURES (FF_TRIM) 5220c68fdeSLars Ellenberg 5377351055SPhilipp Reisner struct packet_info { 5477351055SPhilipp Reisner enum drbd_packet cmd; 55e2857216SAndreas Gruenbacher unsigned int size; 56e2857216SAndreas Gruenbacher unsigned int vnr; 57e658983aSAndreas Gruenbacher void *data; 5877351055SPhilipp Reisner }; 5977351055SPhilipp Reisner 60b411b363SPhilipp Reisner enum finish_epoch { 61b411b363SPhilipp Reisner FE_STILL_LIVE, 62b411b363SPhilipp Reisner FE_DESTROYED, 63b411b363SPhilipp Reisner FE_RECYCLED, 64b411b363SPhilipp Reisner }; 65b411b363SPhilipp Reisner 66bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection); 67bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection); 6869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *); 69a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection); 70bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); 7199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int); 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner 74b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 75b411b363SPhilipp Reisner 7645bb912bSLars Ellenberg /* 7745bb912bSLars Ellenberg * some helper functions to deal with single linked page lists, 7845bb912bSLars Ellenberg * page->private being our "next" pointer. 7945bb912bSLars Ellenberg */ 8045bb912bSLars Ellenberg 8145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off. 8245bb912bSLars Ellenberg * Otherwise, don't modify head, and return NULL. 8345bb912bSLars Ellenberg * Locking is the responsibility of the caller. 8445bb912bSLars Ellenberg */ 8545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n) 8645bb912bSLars Ellenberg { 8745bb912bSLars Ellenberg struct page *page; 8845bb912bSLars Ellenberg struct page *tmp; 8945bb912bSLars Ellenberg 9045bb912bSLars Ellenberg BUG_ON(!n); 9145bb912bSLars Ellenberg BUG_ON(!head); 9245bb912bSLars Ellenberg 9345bb912bSLars Ellenberg page = *head; 9423ce4227SPhilipp Reisner 9523ce4227SPhilipp Reisner if (!page) 9623ce4227SPhilipp Reisner return NULL; 9723ce4227SPhilipp Reisner 9845bb912bSLars Ellenberg while (page) { 9945bb912bSLars Ellenberg tmp = page_chain_next(page); 10045bb912bSLars Ellenberg if (--n == 0) 10145bb912bSLars Ellenberg break; /* found sufficient pages */ 10245bb912bSLars Ellenberg if (tmp == NULL) 10345bb912bSLars Ellenberg /* insufficient pages, don't use any of them. */ 10445bb912bSLars Ellenberg return NULL; 10545bb912bSLars Ellenberg page = tmp; 10645bb912bSLars Ellenberg } 10745bb912bSLars Ellenberg 10845bb912bSLars Ellenberg /* add end of list marker for the returned list */ 10945bb912bSLars Ellenberg set_page_private(page, 0); 11045bb912bSLars Ellenberg /* actual return value, and adjustment of head */ 11145bb912bSLars Ellenberg page = *head; 11245bb912bSLars Ellenberg *head = tmp; 11345bb912bSLars Ellenberg return page; 11445bb912bSLars Ellenberg } 11545bb912bSLars Ellenberg 11645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short) 11745bb912bSLars Ellenberg * "private" page chain, before adding it back to a global chain head 11845bb912bSLars Ellenberg * with page_chain_add() under a spinlock. */ 11945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len) 12045bb912bSLars Ellenberg { 12145bb912bSLars Ellenberg struct page *tmp; 12245bb912bSLars Ellenberg int i = 1; 12345bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) 12445bb912bSLars Ellenberg ++i, page = tmp; 12545bb912bSLars Ellenberg if (len) 12645bb912bSLars Ellenberg *len = i; 12745bb912bSLars Ellenberg return page; 12845bb912bSLars Ellenberg } 12945bb912bSLars Ellenberg 13045bb912bSLars Ellenberg static int page_chain_free(struct page *page) 13145bb912bSLars Ellenberg { 13245bb912bSLars Ellenberg struct page *tmp; 13345bb912bSLars Ellenberg int i = 0; 13445bb912bSLars Ellenberg page_chain_for_each_safe(page, tmp) { 13545bb912bSLars Ellenberg put_page(page); 13645bb912bSLars Ellenberg ++i; 13745bb912bSLars Ellenberg } 13845bb912bSLars Ellenberg return i; 13945bb912bSLars Ellenberg } 14045bb912bSLars Ellenberg 14145bb912bSLars Ellenberg static void page_chain_add(struct page **head, 14245bb912bSLars Ellenberg struct page *chain_first, struct page *chain_last) 14345bb912bSLars Ellenberg { 14445bb912bSLars Ellenberg #if 1 14545bb912bSLars Ellenberg struct page *tmp; 14645bb912bSLars Ellenberg tmp = page_chain_tail(chain_first, NULL); 14745bb912bSLars Ellenberg BUG_ON(tmp != chain_last); 14845bb912bSLars Ellenberg #endif 14945bb912bSLars Ellenberg 15045bb912bSLars Ellenberg /* add chain to head */ 15145bb912bSLars Ellenberg set_page_private(chain_last, (unsigned long)*head); 15245bb912bSLars Ellenberg *head = chain_first; 15345bb912bSLars Ellenberg } 15445bb912bSLars Ellenberg 155b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device, 15618c2d522SAndreas Gruenbacher unsigned int number) 157b411b363SPhilipp Reisner { 158b411b363SPhilipp Reisner struct page *page = NULL; 15945bb912bSLars Ellenberg struct page *tmp = NULL; 16018c2d522SAndreas Gruenbacher unsigned int i = 0; 161b411b363SPhilipp Reisner 162b411b363SPhilipp Reisner /* Yes, testing drbd_pp_vacant outside the lock is racy. 163b411b363SPhilipp Reisner * So what. It saves a spin_lock. */ 16445bb912bSLars Ellenberg if (drbd_pp_vacant >= number) { 165b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 16645bb912bSLars Ellenberg page = page_chain_del(&drbd_pp_pool, number); 16745bb912bSLars Ellenberg if (page) 16845bb912bSLars Ellenberg drbd_pp_vacant -= number; 169b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 17045bb912bSLars Ellenberg if (page) 17145bb912bSLars Ellenberg return page; 172b411b363SPhilipp Reisner } 17345bb912bSLars Ellenberg 174b411b363SPhilipp Reisner /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD 175b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 176b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 17745bb912bSLars Ellenberg for (i = 0; i < number; i++) { 17845bb912bSLars Ellenberg tmp = alloc_page(GFP_TRY); 17945bb912bSLars Ellenberg if (!tmp) 18045bb912bSLars Ellenberg break; 18145bb912bSLars Ellenberg set_page_private(tmp, (unsigned long)page); 18245bb912bSLars Ellenberg page = tmp; 18345bb912bSLars Ellenberg } 18445bb912bSLars Ellenberg 18545bb912bSLars Ellenberg if (i == number) 186b411b363SPhilipp Reisner return page; 18745bb912bSLars Ellenberg 18845bb912bSLars Ellenberg /* Not enough pages immediately available this time. 189c37c8ecfSAndreas Gruenbacher * No need to jump around here, drbd_alloc_pages will retry this 19045bb912bSLars Ellenberg * function "soon". */ 19145bb912bSLars Ellenberg if (page) { 19245bb912bSLars Ellenberg tmp = page_chain_tail(page, NULL); 19345bb912bSLars Ellenberg spin_lock(&drbd_pp_lock); 19445bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 19545bb912bSLars Ellenberg drbd_pp_vacant += i; 19645bb912bSLars Ellenberg spin_unlock(&drbd_pp_lock); 19745bb912bSLars Ellenberg } 19845bb912bSLars Ellenberg return NULL; 199b411b363SPhilipp Reisner } 200b411b363SPhilipp Reisner 201b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device, 202a990be46SAndreas Gruenbacher struct list_head *to_be_freed) 203b411b363SPhilipp Reisner { 204a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req, *tmp; 205b411b363SPhilipp Reisner 206b411b363SPhilipp Reisner /* The EEs are always appended to the end of the list. Since 207b411b363SPhilipp Reisner they are sent in order over the wire, they have to finish 208b411b363SPhilipp Reisner in order. As soon as we see the first not finished we can 209b411b363SPhilipp Reisner stop to examine the list... */ 210b411b363SPhilipp Reisner 211a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { 212045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) 213b411b363SPhilipp Reisner break; 214a8cd15baSAndreas Gruenbacher list_move(&peer_req->w.list, to_be_freed); 215b411b363SPhilipp Reisner } 216b411b363SPhilipp Reisner } 217b411b363SPhilipp Reisner 218668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) 219b411b363SPhilipp Reisner { 220b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 221db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 222b411b363SPhilipp Reisner 2230500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 224b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 2250500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 226a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 227b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 228b411b363SPhilipp Reisner } 229b411b363SPhilipp Reisner 230668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) 231668700b4SPhilipp Reisner { 232668700b4SPhilipp Reisner struct drbd_peer_device *peer_device; 233668700b4SPhilipp Reisner int vnr; 234668700b4SPhilipp Reisner 235668700b4SPhilipp Reisner rcu_read_lock(); 236668700b4SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 237668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device; 238668700b4SPhilipp Reisner if (!atomic_read(&device->pp_in_use_by_net)) 239668700b4SPhilipp Reisner continue; 240668700b4SPhilipp Reisner 241668700b4SPhilipp Reisner kref_get(&device->kref); 242668700b4SPhilipp Reisner rcu_read_unlock(); 243668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 244668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 245668700b4SPhilipp Reisner rcu_read_lock(); 246668700b4SPhilipp Reisner } 247668700b4SPhilipp Reisner rcu_read_unlock(); 248668700b4SPhilipp Reisner } 249668700b4SPhilipp Reisner 250b411b363SPhilipp Reisner /** 251c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) 252b30ab791SAndreas Gruenbacher * @device: DRBD device. 25345bb912bSLars Ellenberg * @number: number of pages requested 25445bb912bSLars Ellenberg * @retry: whether to retry, if not enough pages are available right now 255b411b363SPhilipp Reisner * 25645bb912bSLars Ellenberg * Tries to allocate number pages, first from our own page pool, then from 2570e49d7b0SLars Ellenberg * the kernel. 25845bb912bSLars Ellenberg * Possibly retry until DRBD frees sufficient pages somewhere else. 25945bb912bSLars Ellenberg * 2600e49d7b0SLars Ellenberg * If this allocation would exceed the max_buffers setting, we throttle 2610e49d7b0SLars Ellenberg * allocation (schedule_timeout) to give the system some room to breathe. 2620e49d7b0SLars Ellenberg * 2630e49d7b0SLars Ellenberg * We do not use max-buffers as hard limit, because it could lead to 2640e49d7b0SLars Ellenberg * congestion and further to a distributed deadlock during online-verify or 2650e49d7b0SLars Ellenberg * (checksum based) resync, if the max-buffers, socket buffer sizes and 2660e49d7b0SLars Ellenberg * resync-rate settings are mis-configured. 2670e49d7b0SLars Ellenberg * 26845bb912bSLars Ellenberg * Returns a page chain linked via page->private. 269b411b363SPhilipp Reisner */ 27069a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, 271c37c8ecfSAndreas Gruenbacher bool retry) 272b411b363SPhilipp Reisner { 27369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 274b411b363SPhilipp Reisner struct page *page = NULL; 27544ed167dSPhilipp Reisner struct net_conf *nc; 276b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2770e49d7b0SLars Ellenberg unsigned int mxb; 278b411b363SPhilipp Reisner 27944ed167dSPhilipp Reisner rcu_read_lock(); 28069a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 28144ed167dSPhilipp Reisner mxb = nc ? nc->max_buffers : 1000000; 28244ed167dSPhilipp Reisner rcu_read_unlock(); 28344ed167dSPhilipp Reisner 284b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) 285b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 286b411b363SPhilipp Reisner 287668700b4SPhilipp Reisner /* Try to keep the fast path fast, but occasionally we need 288668700b4SPhilipp Reisner * to reclaim the pages we lended to the network stack. */ 289668700b4SPhilipp Reisner if (page && atomic_read(&device->pp_in_use_by_net) > 512) 290668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 291668700b4SPhilipp Reisner 29245bb912bSLars Ellenberg while (page == NULL) { 293b411b363SPhilipp Reisner prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 294b411b363SPhilipp Reisner 295668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 296b411b363SPhilipp Reisner 297b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) { 298b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 299b411b363SPhilipp Reisner if (page) 300b411b363SPhilipp Reisner break; 301b411b363SPhilipp Reisner } 302b411b363SPhilipp Reisner 303b411b363SPhilipp Reisner if (!retry) 304b411b363SPhilipp Reisner break; 305b411b363SPhilipp Reisner 306b411b363SPhilipp Reisner if (signal_pending(current)) { 307d0180171SAndreas Gruenbacher drbd_warn(device, "drbd_alloc_pages interrupted!\n"); 308b411b363SPhilipp Reisner break; 309b411b363SPhilipp Reisner } 310b411b363SPhilipp Reisner 3110e49d7b0SLars Ellenberg if (schedule_timeout(HZ/10) == 0) 3120e49d7b0SLars Ellenberg mxb = UINT_MAX; 313b411b363SPhilipp Reisner } 314b411b363SPhilipp Reisner finish_wait(&drbd_pp_wait, &wait); 315b411b363SPhilipp Reisner 31645bb912bSLars Ellenberg if (page) 317b30ab791SAndreas Gruenbacher atomic_add(number, &device->pp_in_use); 318b411b363SPhilipp Reisner return page; 319b411b363SPhilipp Reisner } 320b411b363SPhilipp Reisner 321c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. 3220500813fSAndreas Gruenbacher * Is also used from inside an other spin_lock_irq(&resource->req_lock); 32345bb912bSLars Ellenberg * Either links the page chain back to the global pool, 32445bb912bSLars Ellenberg * or returns all pages to the system. */ 325b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) 326b411b363SPhilipp Reisner { 327b30ab791SAndreas Gruenbacher atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; 328b411b363SPhilipp Reisner int i; 329435f0740SLars Ellenberg 330a73ff323SLars Ellenberg if (page == NULL) 331a73ff323SLars Ellenberg return; 332a73ff323SLars Ellenberg 3331816a2b4SLars Ellenberg if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) 33445bb912bSLars Ellenberg i = page_chain_free(page); 33545bb912bSLars Ellenberg else { 33645bb912bSLars Ellenberg struct page *tmp; 33745bb912bSLars Ellenberg tmp = page_chain_tail(page, &i); 338b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 33945bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 34045bb912bSLars Ellenberg drbd_pp_vacant += i; 341b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 342b411b363SPhilipp Reisner } 343435f0740SLars Ellenberg i = atomic_sub_return(i, a); 34445bb912bSLars Ellenberg if (i < 0) 345d0180171SAndreas Gruenbacher drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", 346435f0740SLars Ellenberg is_net ? "pp_in_use_by_net" : "pp_in_use", i); 347b411b363SPhilipp Reisner wake_up(&drbd_pp_wait); 348b411b363SPhilipp Reisner } 349b411b363SPhilipp Reisner 350b411b363SPhilipp Reisner /* 351b411b363SPhilipp Reisner You need to hold the req_lock: 352b411b363SPhilipp Reisner _drbd_wait_ee_list_empty() 353b411b363SPhilipp Reisner 354b411b363SPhilipp Reisner You must not have the req_lock: 3553967deb1SAndreas Gruenbacher drbd_free_peer_req() 3560db55363SAndreas Gruenbacher drbd_alloc_peer_req() 3577721f567SAndreas Gruenbacher drbd_free_peer_reqs() 358b411b363SPhilipp Reisner drbd_ee_fix_bhs() 359a990be46SAndreas Gruenbacher drbd_finish_peer_reqs() 360b411b363SPhilipp Reisner drbd_clear_done_ee() 361b411b363SPhilipp Reisner drbd_wait_ee_list_empty() 362b411b363SPhilipp Reisner */ 363b411b363SPhilipp Reisner 364f6ffca9fSAndreas Gruenbacher struct drbd_peer_request * 36569a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 366a0fb3c47SLars Ellenberg unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local) 367b411b363SPhilipp Reisner { 36869a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 369db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 370a73ff323SLars Ellenberg struct page *page = NULL; 37145bb912bSLars Ellenberg unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 372b411b363SPhilipp Reisner 373b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) 374b411b363SPhilipp Reisner return NULL; 375b411b363SPhilipp Reisner 376db830c46SAndreas Gruenbacher peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 377db830c46SAndreas Gruenbacher if (!peer_req) { 378b411b363SPhilipp Reisner if (!(gfp_mask & __GFP_NOWARN)) 379d0180171SAndreas Gruenbacher drbd_err(device, "%s: allocation failed\n", __func__); 380b411b363SPhilipp Reisner return NULL; 381b411b363SPhilipp Reisner } 382b411b363SPhilipp Reisner 383a0fb3c47SLars Ellenberg if (has_payload && data_size) { 384d0164adcSMel Gorman page = drbd_alloc_pages(peer_device, nr_pages, 385d0164adcSMel Gorman gfpflags_allow_blocking(gfp_mask)); 38645bb912bSLars Ellenberg if (!page) 38745bb912bSLars Ellenberg goto fail; 388a73ff323SLars Ellenberg } 389b411b363SPhilipp Reisner 390c5a2c150SLars Ellenberg memset(peer_req, 0, sizeof(*peer_req)); 391c5a2c150SLars Ellenberg INIT_LIST_HEAD(&peer_req->w.list); 392db830c46SAndreas Gruenbacher drbd_clear_interval(&peer_req->i); 393db830c46SAndreas Gruenbacher peer_req->i.size = data_size; 394db830c46SAndreas Gruenbacher peer_req->i.sector = sector; 395c5a2c150SLars Ellenberg peer_req->submit_jif = jiffies; 396a8cd15baSAndreas Gruenbacher peer_req->peer_device = peer_device; 397db830c46SAndreas Gruenbacher peer_req->pages = page; 3989a8e7753SAndreas Gruenbacher /* 3999a8e7753SAndreas Gruenbacher * The block_id is opaque to the receiver. It is not endianness 4009a8e7753SAndreas Gruenbacher * converted, and sent back to the sender unchanged. 4019a8e7753SAndreas Gruenbacher */ 402db830c46SAndreas Gruenbacher peer_req->block_id = id; 403b411b363SPhilipp Reisner 404db830c46SAndreas Gruenbacher return peer_req; 405b411b363SPhilipp Reisner 40645bb912bSLars Ellenberg fail: 407db830c46SAndreas Gruenbacher mempool_free(peer_req, drbd_ee_mempool); 408b411b363SPhilipp Reisner return NULL; 409b411b363SPhilipp Reisner } 410b411b363SPhilipp Reisner 411b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 412f6ffca9fSAndreas Gruenbacher int is_net) 413b411b363SPhilipp Reisner { 41421ae5d7fSLars Ellenberg might_sleep(); 415db830c46SAndreas Gruenbacher if (peer_req->flags & EE_HAS_DIGEST) 416db830c46SAndreas Gruenbacher kfree(peer_req->digest); 417b30ab791SAndreas Gruenbacher drbd_free_pages(device, peer_req->pages, is_net); 4180b0ba1efSAndreas Gruenbacher D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 4190b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 42021ae5d7fSLars Ellenberg if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { 42121ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 42221ae5d7fSLars Ellenberg drbd_al_complete_io(device, &peer_req->i); 42321ae5d7fSLars Ellenberg } 424db830c46SAndreas Gruenbacher mempool_free(peer_req, drbd_ee_mempool); 425b411b363SPhilipp Reisner } 426b411b363SPhilipp Reisner 427b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) 428b411b363SPhilipp Reisner { 429b411b363SPhilipp Reisner LIST_HEAD(work_list); 430db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 431b411b363SPhilipp Reisner int count = 0; 432b30ab791SAndreas Gruenbacher int is_net = list == &device->net_ee; 433b411b363SPhilipp Reisner 4340500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 435b411b363SPhilipp Reisner list_splice_init(list, &work_list); 4360500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 437b411b363SPhilipp Reisner 438a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 439b30ab791SAndreas Gruenbacher __drbd_free_peer_req(device, peer_req, is_net); 440b411b363SPhilipp Reisner count++; 441b411b363SPhilipp Reisner } 442b411b363SPhilipp Reisner return count; 443b411b363SPhilipp Reisner } 444b411b363SPhilipp Reisner 445b411b363SPhilipp Reisner /* 446a990be46SAndreas Gruenbacher * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. 447b411b363SPhilipp Reisner */ 448b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device) 449b411b363SPhilipp Reisner { 450b411b363SPhilipp Reisner LIST_HEAD(work_list); 451b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 452db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 453e2b3032bSAndreas Gruenbacher int err = 0; 454b411b363SPhilipp Reisner 4550500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 456b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 457b30ab791SAndreas Gruenbacher list_splice_init(&device->done_ee, &work_list); 4580500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 459b411b363SPhilipp Reisner 460a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 461b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 462b411b363SPhilipp Reisner 463b411b363SPhilipp Reisner /* possible callbacks here: 464d4dabbe2SLars Ellenberg * e_end_block, and e_end_resync_block, e_send_superseded. 465b411b363SPhilipp Reisner * all ignore the last argument. 466b411b363SPhilipp Reisner */ 467a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 468e2b3032bSAndreas Gruenbacher int err2; 469e2b3032bSAndreas Gruenbacher 470b411b363SPhilipp Reisner /* list_del not necessary, next/prev members not touched */ 471a8cd15baSAndreas Gruenbacher err2 = peer_req->w.cb(&peer_req->w, !!err); 472e2b3032bSAndreas Gruenbacher if (!err) 473e2b3032bSAndreas Gruenbacher err = err2; 474b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 475b411b363SPhilipp Reisner } 476b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 477b411b363SPhilipp Reisner 478e2b3032bSAndreas Gruenbacher return err; 479b411b363SPhilipp Reisner } 480b411b363SPhilipp Reisner 481b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device, 482d4da1537SAndreas Gruenbacher struct list_head *head) 483b411b363SPhilipp Reisner { 484b411b363SPhilipp Reisner DEFINE_WAIT(wait); 485b411b363SPhilipp Reisner 486b411b363SPhilipp Reisner /* avoids spin_lock/unlock 487b411b363SPhilipp Reisner * and calling prepare_to_wait in the fast path */ 488b411b363SPhilipp Reisner while (!list_empty(head)) { 489b30ab791SAndreas Gruenbacher prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 4900500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4917eaceaccSJens Axboe io_schedule(); 492b30ab791SAndreas Gruenbacher finish_wait(&device->ee_wait, &wait); 4930500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 494b411b363SPhilipp Reisner } 495b411b363SPhilipp Reisner } 496b411b363SPhilipp Reisner 497b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device, 498d4da1537SAndreas Gruenbacher struct list_head *head) 499b411b363SPhilipp Reisner { 5000500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 501b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, head); 5020500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 503b411b363SPhilipp Reisner } 504b411b363SPhilipp Reisner 505dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 506b411b363SPhilipp Reisner { 507b411b363SPhilipp Reisner struct kvec iov = { 508b411b363SPhilipp Reisner .iov_base = buf, 509b411b363SPhilipp Reisner .iov_len = size, 510b411b363SPhilipp Reisner }; 511b411b363SPhilipp Reisner struct msghdr msg = { 512b411b363SPhilipp Reisner .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 513b411b363SPhilipp Reisner }; 514f730c848SAl Viro return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags); 515b411b363SPhilipp Reisner } 516b411b363SPhilipp Reisner 517bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 518b411b363SPhilipp Reisner { 519b411b363SPhilipp Reisner int rv; 520b411b363SPhilipp Reisner 521bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->data.socket, buf, size, 0); 522b411b363SPhilipp Reisner 523b411b363SPhilipp Reisner if (rv < 0) { 524b411b363SPhilipp Reisner if (rv == -ECONNRESET) 5251ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was reset by peer\n"); 526b411b363SPhilipp Reisner else if (rv != -ERESTARTSYS) 5271ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 528b411b363SPhilipp Reisner } else if (rv == 0) { 529bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 530b66623e3SPhilipp Reisner long t; 531b66623e3SPhilipp Reisner rcu_read_lock(); 532bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 533b66623e3SPhilipp Reisner rcu_read_unlock(); 534b66623e3SPhilipp Reisner 535bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); 536b66623e3SPhilipp Reisner 537599377acSPhilipp Reisner if (t) 538599377acSPhilipp Reisner goto out; 539599377acSPhilipp Reisner } 5401ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was shut down by peer\n"); 541599377acSPhilipp Reisner } 542599377acSPhilipp Reisner 543b411b363SPhilipp Reisner if (rv != size) 544bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 545b411b363SPhilipp Reisner 546599377acSPhilipp Reisner out: 547b411b363SPhilipp Reisner return rv; 548b411b363SPhilipp Reisner } 549b411b363SPhilipp Reisner 550bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) 551c6967746SAndreas Gruenbacher { 552c6967746SAndreas Gruenbacher int err; 553c6967746SAndreas Gruenbacher 554bde89a9eSAndreas Gruenbacher err = drbd_recv(connection, buf, size); 555c6967746SAndreas Gruenbacher if (err != size) { 556c6967746SAndreas Gruenbacher if (err >= 0) 557c6967746SAndreas Gruenbacher err = -EIO; 558c6967746SAndreas Gruenbacher } else 559c6967746SAndreas Gruenbacher err = 0; 560c6967746SAndreas Gruenbacher return err; 561c6967746SAndreas Gruenbacher } 562c6967746SAndreas Gruenbacher 563bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) 564a5c31904SAndreas Gruenbacher { 565a5c31904SAndreas Gruenbacher int err; 566a5c31904SAndreas Gruenbacher 567bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, buf, size); 568a5c31904SAndreas Gruenbacher if (err && !signal_pending(current)) 5691ec861ebSAndreas Gruenbacher drbd_warn(connection, "short read (expected size %d)\n", (int)size); 570a5c31904SAndreas Gruenbacher return err; 571a5c31904SAndreas Gruenbacher } 572a5c31904SAndreas Gruenbacher 5735dbf1673SLars Ellenberg /* quoting tcp(7): 5745dbf1673SLars Ellenberg * On individual connections, the socket buffer size must be set prior to the 5755dbf1673SLars Ellenberg * listen(2) or connect(2) calls in order to have it take effect. 5765dbf1673SLars Ellenberg * This is our wrapper to do so. 5775dbf1673SLars Ellenberg */ 5785dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd, 5795dbf1673SLars Ellenberg unsigned int rcv) 5805dbf1673SLars Ellenberg { 5815dbf1673SLars Ellenberg /* open coded SO_SNDBUF, SO_RCVBUF */ 5825dbf1673SLars Ellenberg if (snd) { 5835dbf1673SLars Ellenberg sock->sk->sk_sndbuf = snd; 5845dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5855dbf1673SLars Ellenberg } 5865dbf1673SLars Ellenberg if (rcv) { 5875dbf1673SLars Ellenberg sock->sk->sk_rcvbuf = rcv; 5885dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5895dbf1673SLars Ellenberg } 5905dbf1673SLars Ellenberg } 5915dbf1673SLars Ellenberg 592bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection) 593b411b363SPhilipp Reisner { 594b411b363SPhilipp Reisner const char *what; 595b411b363SPhilipp Reisner struct socket *sock; 596b411b363SPhilipp Reisner struct sockaddr_in6 src_in6; 59744ed167dSPhilipp Reisner struct sockaddr_in6 peer_in6; 59844ed167dSPhilipp Reisner struct net_conf *nc; 59944ed167dSPhilipp Reisner int err, peer_addr_len, my_addr_len; 60069ef82deSAndreas Gruenbacher int sndbuf_size, rcvbuf_size, connect_int; 601b411b363SPhilipp Reisner int disconnect_on_error = 1; 602b411b363SPhilipp Reisner 60344ed167dSPhilipp Reisner rcu_read_lock(); 604bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 60544ed167dSPhilipp Reisner if (!nc) { 60644ed167dSPhilipp Reisner rcu_read_unlock(); 607b411b363SPhilipp Reisner return NULL; 60844ed167dSPhilipp Reisner } 60944ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 61044ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 61169ef82deSAndreas Gruenbacher connect_int = nc->connect_int; 612089c075dSAndreas Gruenbacher rcu_read_unlock(); 61344ed167dSPhilipp Reisner 614bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); 615bde89a9eSAndreas Gruenbacher memcpy(&src_in6, &connection->my_addr, my_addr_len); 61644ed167dSPhilipp Reisner 617bde89a9eSAndreas Gruenbacher if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) 61844ed167dSPhilipp Reisner src_in6.sin6_port = 0; 61944ed167dSPhilipp Reisner else 62044ed167dSPhilipp Reisner ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ 62144ed167dSPhilipp Reisner 622bde89a9eSAndreas Gruenbacher peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); 623bde89a9eSAndreas Gruenbacher memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); 624b411b363SPhilipp Reisner 625b411b363SPhilipp Reisner what = "sock_create_kern"; 626eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, 627b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &sock); 628b411b363SPhilipp Reisner if (err < 0) { 629b411b363SPhilipp Reisner sock = NULL; 630b411b363SPhilipp Reisner goto out; 631b411b363SPhilipp Reisner } 632b411b363SPhilipp Reisner 633b411b363SPhilipp Reisner sock->sk->sk_rcvtimeo = 63469ef82deSAndreas Gruenbacher sock->sk->sk_sndtimeo = connect_int * HZ; 63544ed167dSPhilipp Reisner drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); 636b411b363SPhilipp Reisner 637b411b363SPhilipp Reisner /* explicitly bind to the configured IP as source IP 638b411b363SPhilipp Reisner * for the outgoing connections. 639b411b363SPhilipp Reisner * This is needed for multihomed hosts and to be 640b411b363SPhilipp Reisner * able to use lo: interfaces for drbd. 641b411b363SPhilipp Reisner * Make sure to use 0 as port number, so linux selects 642b411b363SPhilipp Reisner * a free one dynamically. 643b411b363SPhilipp Reisner */ 644b411b363SPhilipp Reisner what = "bind before connect"; 64544ed167dSPhilipp Reisner err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); 646b411b363SPhilipp Reisner if (err < 0) 647b411b363SPhilipp Reisner goto out; 648b411b363SPhilipp Reisner 649b411b363SPhilipp Reisner /* connect may fail, peer not yet available. 650b411b363SPhilipp Reisner * stay C_WF_CONNECTION, don't go Disconnecting! */ 651b411b363SPhilipp Reisner disconnect_on_error = 0; 652b411b363SPhilipp Reisner what = "connect"; 65344ed167dSPhilipp Reisner err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); 654b411b363SPhilipp Reisner 655b411b363SPhilipp Reisner out: 656b411b363SPhilipp Reisner if (err < 0) { 657b411b363SPhilipp Reisner if (sock) { 658b411b363SPhilipp Reisner sock_release(sock); 659b411b363SPhilipp Reisner sock = NULL; 660b411b363SPhilipp Reisner } 661b411b363SPhilipp Reisner switch (-err) { 662b411b363SPhilipp Reisner /* timeout, busy, signal pending */ 663b411b363SPhilipp Reisner case ETIMEDOUT: case EAGAIN: case EINPROGRESS: 664b411b363SPhilipp Reisner case EINTR: case ERESTARTSYS: 665b411b363SPhilipp Reisner /* peer not (yet) available, network problem */ 666b411b363SPhilipp Reisner case ECONNREFUSED: case ENETUNREACH: 667b411b363SPhilipp Reisner case EHOSTDOWN: case EHOSTUNREACH: 668b411b363SPhilipp Reisner disconnect_on_error = 0; 669b411b363SPhilipp Reisner break; 670b411b363SPhilipp Reisner default: 6711ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 672b411b363SPhilipp Reisner } 673b411b363SPhilipp Reisner if (disconnect_on_error) 674bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 675b411b363SPhilipp Reisner } 67644ed167dSPhilipp Reisner 677b411b363SPhilipp Reisner return sock; 678b411b363SPhilipp Reisner } 679b411b363SPhilipp Reisner 6807a426fd8SPhilipp Reisner struct accept_wait_data { 681bde89a9eSAndreas Gruenbacher struct drbd_connection *connection; 6827a426fd8SPhilipp Reisner struct socket *s_listen; 6837a426fd8SPhilipp Reisner struct completion door_bell; 6847a426fd8SPhilipp Reisner void (*original_sk_state_change)(struct sock *sk); 6857a426fd8SPhilipp Reisner 6867a426fd8SPhilipp Reisner }; 6877a426fd8SPhilipp Reisner 688715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk) 689b411b363SPhilipp Reisner { 6907a426fd8SPhilipp Reisner struct accept_wait_data *ad = sk->sk_user_data; 691715306f6SAndreas Gruenbacher void (*state_change)(struct sock *sk); 6927a426fd8SPhilipp Reisner 693715306f6SAndreas Gruenbacher state_change = ad->original_sk_state_change; 694715306f6SAndreas Gruenbacher if (sk->sk_state == TCP_ESTABLISHED) 6957a426fd8SPhilipp Reisner complete(&ad->door_bell); 696715306f6SAndreas Gruenbacher state_change(sk); 6977a426fd8SPhilipp Reisner } 6987a426fd8SPhilipp Reisner 699bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) 700b411b363SPhilipp Reisner { 7011f3e509bSPhilipp Reisner int err, sndbuf_size, rcvbuf_size, my_addr_len; 70244ed167dSPhilipp Reisner struct sockaddr_in6 my_addr; 7031f3e509bSPhilipp Reisner struct socket *s_listen; 70444ed167dSPhilipp Reisner struct net_conf *nc; 705b411b363SPhilipp Reisner const char *what; 706b411b363SPhilipp Reisner 70744ed167dSPhilipp Reisner rcu_read_lock(); 708bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 70944ed167dSPhilipp Reisner if (!nc) { 71044ed167dSPhilipp Reisner rcu_read_unlock(); 7117a426fd8SPhilipp Reisner return -EIO; 71244ed167dSPhilipp Reisner } 71344ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 71444ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 71544ed167dSPhilipp Reisner rcu_read_unlock(); 716b411b363SPhilipp Reisner 717bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); 718bde89a9eSAndreas Gruenbacher memcpy(&my_addr, &connection->my_addr, my_addr_len); 719b411b363SPhilipp Reisner 720b411b363SPhilipp Reisner what = "sock_create_kern"; 721eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, 722b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &s_listen); 723b411b363SPhilipp Reisner if (err) { 724b411b363SPhilipp Reisner s_listen = NULL; 725b411b363SPhilipp Reisner goto out; 726b411b363SPhilipp Reisner } 727b411b363SPhilipp Reisner 7284a17fd52SPavel Emelyanov s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 72944ed167dSPhilipp Reisner drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); 730b411b363SPhilipp Reisner 731b411b363SPhilipp Reisner what = "bind before listen"; 73244ed167dSPhilipp Reisner err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); 733b411b363SPhilipp Reisner if (err < 0) 734b411b363SPhilipp Reisner goto out; 735b411b363SPhilipp Reisner 7367a426fd8SPhilipp Reisner ad->s_listen = s_listen; 7377a426fd8SPhilipp Reisner write_lock_bh(&s_listen->sk->sk_callback_lock); 7387a426fd8SPhilipp Reisner ad->original_sk_state_change = s_listen->sk->sk_state_change; 739715306f6SAndreas Gruenbacher s_listen->sk->sk_state_change = drbd_incoming_connection; 7407a426fd8SPhilipp Reisner s_listen->sk->sk_user_data = ad; 7417a426fd8SPhilipp Reisner write_unlock_bh(&s_listen->sk->sk_callback_lock); 742b411b363SPhilipp Reisner 7432820fd39SPhilipp Reisner what = "listen"; 7442820fd39SPhilipp Reisner err = s_listen->ops->listen(s_listen, 5); 7452820fd39SPhilipp Reisner if (err < 0) 7462820fd39SPhilipp Reisner goto out; 7472820fd39SPhilipp Reisner 7487a426fd8SPhilipp Reisner return 0; 749b411b363SPhilipp Reisner out: 750b411b363SPhilipp Reisner if (s_listen) 751b411b363SPhilipp Reisner sock_release(s_listen); 752b411b363SPhilipp Reisner if (err < 0) { 753b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7541ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 755bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 756b411b363SPhilipp Reisner } 757b411b363SPhilipp Reisner } 7581f3e509bSPhilipp Reisner 7597a426fd8SPhilipp Reisner return -EIO; 7601f3e509bSPhilipp Reisner } 7611f3e509bSPhilipp Reisner 762715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) 763715306f6SAndreas Gruenbacher { 764715306f6SAndreas Gruenbacher write_lock_bh(&sk->sk_callback_lock); 765715306f6SAndreas Gruenbacher sk->sk_state_change = ad->original_sk_state_change; 766715306f6SAndreas Gruenbacher sk->sk_user_data = NULL; 767715306f6SAndreas Gruenbacher write_unlock_bh(&sk->sk_callback_lock); 768715306f6SAndreas Gruenbacher } 769715306f6SAndreas Gruenbacher 770bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) 7711f3e509bSPhilipp Reisner { 7721f3e509bSPhilipp Reisner int timeo, connect_int, err = 0; 7731f3e509bSPhilipp Reisner struct socket *s_estab = NULL; 7741f3e509bSPhilipp Reisner struct net_conf *nc; 7751f3e509bSPhilipp Reisner 7761f3e509bSPhilipp Reisner rcu_read_lock(); 777bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 7781f3e509bSPhilipp Reisner if (!nc) { 7791f3e509bSPhilipp Reisner rcu_read_unlock(); 7801f3e509bSPhilipp Reisner return NULL; 7811f3e509bSPhilipp Reisner } 7821f3e509bSPhilipp Reisner connect_int = nc->connect_int; 7831f3e509bSPhilipp Reisner rcu_read_unlock(); 7841f3e509bSPhilipp Reisner 7851f3e509bSPhilipp Reisner timeo = connect_int * HZ; 78638b682b2SAkinobu Mita /* 28.5% random jitter */ 78738b682b2SAkinobu Mita timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7; 7881f3e509bSPhilipp Reisner 7897a426fd8SPhilipp Reisner err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); 7907a426fd8SPhilipp Reisner if (err <= 0) 7917a426fd8SPhilipp Reisner return NULL; 7921f3e509bSPhilipp Reisner 7937a426fd8SPhilipp Reisner err = kernel_accept(ad->s_listen, &s_estab, 0); 794b411b363SPhilipp Reisner if (err < 0) { 795b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7961ec861ebSAndreas Gruenbacher drbd_err(connection, "accept failed, err = %d\n", err); 797bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 798b411b363SPhilipp Reisner } 799b411b363SPhilipp Reisner } 800b411b363SPhilipp Reisner 801715306f6SAndreas Gruenbacher if (s_estab) 802715306f6SAndreas Gruenbacher unregister_state_change(s_estab->sk, ad); 803b411b363SPhilipp Reisner 804b411b363SPhilipp Reisner return s_estab; 805b411b363SPhilipp Reisner } 806b411b363SPhilipp Reisner 807bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *); 808b411b363SPhilipp Reisner 809bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, 8109f5bdc33SAndreas Gruenbacher enum drbd_packet cmd) 8119f5bdc33SAndreas Gruenbacher { 812bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) 8139f5bdc33SAndreas Gruenbacher return -EIO; 814bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, cmd, 0, NULL, 0); 815b411b363SPhilipp Reisner } 816b411b363SPhilipp Reisner 817bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock) 818b411b363SPhilipp Reisner { 819bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 8209f5bdc33SAndreas Gruenbacher struct packet_info pi; 8214920e37aSPhilipp Reisner struct net_conf *nc; 8229f5bdc33SAndreas Gruenbacher int err; 823b411b363SPhilipp Reisner 8244920e37aSPhilipp Reisner rcu_read_lock(); 8254920e37aSPhilipp Reisner nc = rcu_dereference(connection->net_conf); 8264920e37aSPhilipp Reisner if (!nc) { 8274920e37aSPhilipp Reisner rcu_read_unlock(); 8284920e37aSPhilipp Reisner return -EIO; 8294920e37aSPhilipp Reisner } 8304920e37aSPhilipp Reisner sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; 8314920e37aSPhilipp Reisner rcu_read_unlock(); 8324920e37aSPhilipp Reisner 833bde89a9eSAndreas Gruenbacher err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 8349f5bdc33SAndreas Gruenbacher if (err != header_size) { 8359f5bdc33SAndreas Gruenbacher if (err >= 0) 8369f5bdc33SAndreas Gruenbacher err = -EIO; 8379f5bdc33SAndreas Gruenbacher return err; 8389f5bdc33SAndreas Gruenbacher } 839bde89a9eSAndreas Gruenbacher err = decode_header(connection, connection->data.rbuf, &pi); 8409f5bdc33SAndreas Gruenbacher if (err) 8419f5bdc33SAndreas Gruenbacher return err; 8429f5bdc33SAndreas Gruenbacher return pi.cmd; 843b411b363SPhilipp Reisner } 844b411b363SPhilipp Reisner 845b411b363SPhilipp Reisner /** 846b411b363SPhilipp Reisner * drbd_socket_okay() - Free the socket if its connection is not okay 847b411b363SPhilipp Reisner * @sock: pointer to the pointer to the socket. 848b411b363SPhilipp Reisner */ 8495d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock) 850b411b363SPhilipp Reisner { 851b411b363SPhilipp Reisner int rr; 852b411b363SPhilipp Reisner char tb[4]; 853b411b363SPhilipp Reisner 854b411b363SPhilipp Reisner if (!*sock) 85581e84650SAndreas Gruenbacher return false; 856b411b363SPhilipp Reisner 857dbd9eea0SPhilipp Reisner rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 858b411b363SPhilipp Reisner 859b411b363SPhilipp Reisner if (rr > 0 || rr == -EAGAIN) { 86081e84650SAndreas Gruenbacher return true; 861b411b363SPhilipp Reisner } else { 862b411b363SPhilipp Reisner sock_release(*sock); 863b411b363SPhilipp Reisner *sock = NULL; 86481e84650SAndreas Gruenbacher return false; 865b411b363SPhilipp Reisner } 866b411b363SPhilipp Reisner } 8675d0b17f1SPhilipp Reisner 8685d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection, 8695d0b17f1SPhilipp Reisner struct socket **sock1, 8705d0b17f1SPhilipp Reisner struct socket **sock2) 8715d0b17f1SPhilipp Reisner { 8725d0b17f1SPhilipp Reisner struct net_conf *nc; 8735d0b17f1SPhilipp Reisner int timeout; 8745d0b17f1SPhilipp Reisner bool ok; 8755d0b17f1SPhilipp Reisner 8765d0b17f1SPhilipp Reisner if (!*sock1 || !*sock2) 8775d0b17f1SPhilipp Reisner return false; 8785d0b17f1SPhilipp Reisner 8795d0b17f1SPhilipp Reisner rcu_read_lock(); 8805d0b17f1SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 8815d0b17f1SPhilipp Reisner timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; 8825d0b17f1SPhilipp Reisner rcu_read_unlock(); 8835d0b17f1SPhilipp Reisner schedule_timeout_interruptible(timeout); 8845d0b17f1SPhilipp Reisner 8855d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock1); 8865d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock2) && ok; 8875d0b17f1SPhilipp Reisner 8885d0b17f1SPhilipp Reisner return ok; 8895d0b17f1SPhilipp Reisner } 8905d0b17f1SPhilipp Reisner 8912325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created 8922325eb66SPhilipp Reisner in a connection */ 89369a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device) 894907599e0SPhilipp Reisner { 89569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 8960829f5edSAndreas Gruenbacher int err; 897907599e0SPhilipp Reisner 898b30ab791SAndreas Gruenbacher atomic_set(&device->packet_seq, 0); 899b30ab791SAndreas Gruenbacher device->peer_seq = 0; 900907599e0SPhilipp Reisner 90169a22773SAndreas Gruenbacher device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? 90269a22773SAndreas Gruenbacher &peer_device->connection->cstate_mutex : 903b30ab791SAndreas Gruenbacher &device->own_state_mutex; 9048410da8fSPhilipp Reisner 90569a22773SAndreas Gruenbacher err = drbd_send_sync_param(peer_device); 9060829f5edSAndreas Gruenbacher if (!err) 90769a22773SAndreas Gruenbacher err = drbd_send_sizes(peer_device, 0, 0); 9080829f5edSAndreas Gruenbacher if (!err) 90969a22773SAndreas Gruenbacher err = drbd_send_uuids(peer_device); 9100829f5edSAndreas Gruenbacher if (!err) 91169a22773SAndreas Gruenbacher err = drbd_send_current_state(peer_device); 912b30ab791SAndreas Gruenbacher clear_bit(USE_DEGR_WFC_T, &device->flags); 913b30ab791SAndreas Gruenbacher clear_bit(RESIZE_PENDING, &device->flags); 914b30ab791SAndreas Gruenbacher atomic_set(&device->ap_in_flight, 0); 915b30ab791SAndreas Gruenbacher mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ 9160829f5edSAndreas Gruenbacher return err; 917907599e0SPhilipp Reisner } 918b411b363SPhilipp Reisner 919b411b363SPhilipp Reisner /* 920b411b363SPhilipp Reisner * return values: 921b411b363SPhilipp Reisner * 1 yes, we have a valid connection 922b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 923b411b363SPhilipp Reisner * -1 peer talks different language, 924b411b363SPhilipp Reisner * no point in trying again, please go standalone. 925b411b363SPhilipp Reisner * -2 We do not have a network config... 926b411b363SPhilipp Reisner */ 927bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection) 928b411b363SPhilipp Reisner { 9297da35862SPhilipp Reisner struct drbd_socket sock, msock; 930c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 93144ed167dSPhilipp Reisner struct net_conf *nc; 9325d0b17f1SPhilipp Reisner int vnr, timeout, h; 9335d0b17f1SPhilipp Reisner bool discard_my_data, ok; 934197296ffSPhilipp Reisner enum drbd_state_rv rv; 9357a426fd8SPhilipp Reisner struct accept_wait_data ad = { 936bde89a9eSAndreas Gruenbacher .connection = connection, 9377a426fd8SPhilipp Reisner .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), 9387a426fd8SPhilipp Reisner }; 939b411b363SPhilipp Reisner 940bde89a9eSAndreas Gruenbacher clear_bit(DISCONNECT_SENT, &connection->flags); 941bde89a9eSAndreas Gruenbacher if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) 942b411b363SPhilipp Reisner return -2; 943b411b363SPhilipp Reisner 9447da35862SPhilipp Reisner mutex_init(&sock.mutex); 945bde89a9eSAndreas Gruenbacher sock.sbuf = connection->data.sbuf; 946bde89a9eSAndreas Gruenbacher sock.rbuf = connection->data.rbuf; 9477da35862SPhilipp Reisner sock.socket = NULL; 9487da35862SPhilipp Reisner mutex_init(&msock.mutex); 949bde89a9eSAndreas Gruenbacher msock.sbuf = connection->meta.sbuf; 950bde89a9eSAndreas Gruenbacher msock.rbuf = connection->meta.rbuf; 9517da35862SPhilipp Reisner msock.socket = NULL; 9527da35862SPhilipp Reisner 9530916e0e3SAndreas Gruenbacher /* Assume that the peer only understands protocol 80 until we know better. */ 954bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = 80; 955b411b363SPhilipp Reisner 956bde89a9eSAndreas Gruenbacher if (prepare_listen_socket(connection, &ad)) 9577a426fd8SPhilipp Reisner return 0; 958b411b363SPhilipp Reisner 959b411b363SPhilipp Reisner do { 9602bf89621SAndreas Gruenbacher struct socket *s; 961b411b363SPhilipp Reisner 962bde89a9eSAndreas Gruenbacher s = drbd_try_connect(connection); 963b411b363SPhilipp Reisner if (s) { 9647da35862SPhilipp Reisner if (!sock.socket) { 9657da35862SPhilipp Reisner sock.socket = s; 966bde89a9eSAndreas Gruenbacher send_first_packet(connection, &sock, P_INITIAL_DATA); 9677da35862SPhilipp Reisner } else if (!msock.socket) { 968bde89a9eSAndreas Gruenbacher clear_bit(RESOLVE_CONFLICTS, &connection->flags); 9697da35862SPhilipp Reisner msock.socket = s; 970bde89a9eSAndreas Gruenbacher send_first_packet(connection, &msock, P_INITIAL_META); 971b411b363SPhilipp Reisner } else { 9721ec861ebSAndreas Gruenbacher drbd_err(connection, "Logic error in conn_connect()\n"); 973b411b363SPhilipp Reisner goto out_release_sockets; 974b411b363SPhilipp Reisner } 975b411b363SPhilipp Reisner } 976b411b363SPhilipp Reisner 9775d0b17f1SPhilipp Reisner if (connection_established(connection, &sock.socket, &msock.socket)) 978b411b363SPhilipp Reisner break; 979b411b363SPhilipp Reisner 980b411b363SPhilipp Reisner retry: 981bde89a9eSAndreas Gruenbacher s = drbd_wait_for_connect(connection, &ad); 982b411b363SPhilipp Reisner if (s) { 983bde89a9eSAndreas Gruenbacher int fp = receive_first_packet(connection, s); 9847da35862SPhilipp Reisner drbd_socket_okay(&sock.socket); 9857da35862SPhilipp Reisner drbd_socket_okay(&msock.socket); 98692f14951SPhilipp Reisner switch (fp) { 987e5d6f33aSAndreas Gruenbacher case P_INITIAL_DATA: 9887da35862SPhilipp Reisner if (sock.socket) { 9891ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet S crossed\n"); 9907da35862SPhilipp Reisner sock_release(sock.socket); 99180c6eed4SPhilipp Reisner sock.socket = s; 99280c6eed4SPhilipp Reisner goto randomize; 993b411b363SPhilipp Reisner } 9947da35862SPhilipp Reisner sock.socket = s; 995b411b363SPhilipp Reisner break; 996e5d6f33aSAndreas Gruenbacher case P_INITIAL_META: 997bde89a9eSAndreas Gruenbacher set_bit(RESOLVE_CONFLICTS, &connection->flags); 9987da35862SPhilipp Reisner if (msock.socket) { 9991ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet M crossed\n"); 10007da35862SPhilipp Reisner sock_release(msock.socket); 100180c6eed4SPhilipp Reisner msock.socket = s; 100280c6eed4SPhilipp Reisner goto randomize; 1003b411b363SPhilipp Reisner } 10047da35862SPhilipp Reisner msock.socket = s; 1005b411b363SPhilipp Reisner break; 1006b411b363SPhilipp Reisner default: 10071ec861ebSAndreas Gruenbacher drbd_warn(connection, "Error receiving initial packet\n"); 1008b411b363SPhilipp Reisner sock_release(s); 100980c6eed4SPhilipp Reisner randomize: 101038b682b2SAkinobu Mita if (prandom_u32() & 1) 1011b411b363SPhilipp Reisner goto retry; 1012b411b363SPhilipp Reisner } 1013b411b363SPhilipp Reisner } 1014b411b363SPhilipp Reisner 1015bde89a9eSAndreas Gruenbacher if (connection->cstate <= C_DISCONNECTING) 1016b411b363SPhilipp Reisner goto out_release_sockets; 1017b411b363SPhilipp Reisner if (signal_pending(current)) { 1018b411b363SPhilipp Reisner flush_signals(current); 1019b411b363SPhilipp Reisner smp_rmb(); 1020bde89a9eSAndreas Gruenbacher if (get_t_state(&connection->receiver) == EXITING) 1021b411b363SPhilipp Reisner goto out_release_sockets; 1022b411b363SPhilipp Reisner } 1023b411b363SPhilipp Reisner 10245d0b17f1SPhilipp Reisner ok = connection_established(connection, &sock.socket, &msock.socket); 1025b666dbf8SPhilipp Reisner } while (!ok); 1026b411b363SPhilipp Reisner 10277a426fd8SPhilipp Reisner if (ad.s_listen) 10287a426fd8SPhilipp Reisner sock_release(ad.s_listen); 1029b411b363SPhilipp Reisner 103098683650SPhilipp Reisner sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 103198683650SPhilipp Reisner msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 1032b411b363SPhilipp Reisner 10337da35862SPhilipp Reisner sock.socket->sk->sk_allocation = GFP_NOIO; 10347da35862SPhilipp Reisner msock.socket->sk->sk_allocation = GFP_NOIO; 1035b411b363SPhilipp Reisner 10367da35862SPhilipp Reisner sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 10377da35862SPhilipp Reisner msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; 1038b411b363SPhilipp Reisner 1039b411b363SPhilipp Reisner /* NOT YET ... 1040bde89a9eSAndreas Gruenbacher * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; 10417da35862SPhilipp Reisner * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 10426038178eSAndreas Gruenbacher * first set it to the P_CONNECTION_FEATURES timeout, 1043b411b363SPhilipp Reisner * which we set to 4x the configured ping_timeout. */ 104444ed167dSPhilipp Reisner rcu_read_lock(); 1045bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 1046b411b363SPhilipp Reisner 10477da35862SPhilipp Reisner sock.socket->sk->sk_sndtimeo = 10487da35862SPhilipp Reisner sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; 104944ed167dSPhilipp Reisner 10507da35862SPhilipp Reisner msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; 105144ed167dSPhilipp Reisner timeout = nc->timeout * HZ / 10; 105208b165baSPhilipp Reisner discard_my_data = nc->discard_my_data; 105344ed167dSPhilipp Reisner rcu_read_unlock(); 105444ed167dSPhilipp Reisner 10557da35862SPhilipp Reisner msock.socket->sk->sk_sndtimeo = timeout; 1056b411b363SPhilipp Reisner 1057b411b363SPhilipp Reisner /* we don't want delays. 105825985edcSLucas De Marchi * we use TCP_CORK where appropriate, though */ 10597da35862SPhilipp Reisner drbd_tcp_nodelay(sock.socket); 10607da35862SPhilipp Reisner drbd_tcp_nodelay(msock.socket); 1061b411b363SPhilipp Reisner 1062bde89a9eSAndreas Gruenbacher connection->data.socket = sock.socket; 1063bde89a9eSAndreas Gruenbacher connection->meta.socket = msock.socket; 1064bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1065b411b363SPhilipp Reisner 1066bde89a9eSAndreas Gruenbacher h = drbd_do_features(connection); 1067b411b363SPhilipp Reisner if (h <= 0) 1068b411b363SPhilipp Reisner return h; 1069b411b363SPhilipp Reisner 1070bde89a9eSAndreas Gruenbacher if (connection->cram_hmac_tfm) { 1071b30ab791SAndreas Gruenbacher /* drbd_request_state(device, NS(conn, WFAuth)); */ 1072bde89a9eSAndreas Gruenbacher switch (drbd_do_auth(connection)) { 1073b10d96cbSJohannes Thoma case -1: 10741ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed\n"); 1075b411b363SPhilipp Reisner return -1; 1076b10d96cbSJohannes Thoma case 0: 10771ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed, trying again.\n"); 1078b10d96cbSJohannes Thoma return 0; 1079b411b363SPhilipp Reisner } 1080b411b363SPhilipp Reisner } 1081b411b363SPhilipp Reisner 1082bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_sndtimeo = timeout; 1083bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1084b411b363SPhilipp Reisner 1085bde89a9eSAndreas Gruenbacher if (drbd_send_protocol(connection) == -EOPNOTSUPP) 10867e2455c1SPhilipp Reisner return -1; 10871e86ac48SPhilipp Reisner 108813c76abaSPhilipp Reisner /* Prevent a race between resync-handshake and 108913c76abaSPhilipp Reisner * being promoted to Primary. 109013c76abaSPhilipp Reisner * 109113c76abaSPhilipp Reisner * Grab and release the state mutex, so we know that any current 109213c76abaSPhilipp Reisner * drbd_set_role() is finished, and any incoming drbd_set_role 109313c76abaSPhilipp Reisner * will see the STATE_SENT flag, and wait for it to be cleared. 109413c76abaSPhilipp Reisner */ 109531007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 109631007745SPhilipp Reisner mutex_lock(peer_device->device->state_mutex); 109731007745SPhilipp Reisner 109831007745SPhilipp Reisner set_bit(STATE_SENT, &connection->flags); 109931007745SPhilipp Reisner 110031007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 110131007745SPhilipp Reisner mutex_unlock(peer_device->device->state_mutex); 110231007745SPhilipp Reisner 110331007745SPhilipp Reisner rcu_read_lock(); 110431007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 110531007745SPhilipp Reisner struct drbd_device *device = peer_device->device; 110631007745SPhilipp Reisner kref_get(&device->kref); 110731007745SPhilipp Reisner rcu_read_unlock(); 110813c76abaSPhilipp Reisner 110908b165baSPhilipp Reisner if (discard_my_data) 1110b30ab791SAndreas Gruenbacher set_bit(DISCARD_MY_DATA, &device->flags); 111108b165baSPhilipp Reisner else 1112b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 111308b165baSPhilipp Reisner 111469a22773SAndreas Gruenbacher drbd_connected(peer_device); 111505a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1116c141ebdaSPhilipp Reisner rcu_read_lock(); 1117c141ebdaSPhilipp Reisner } 1118c141ebdaSPhilipp Reisner rcu_read_unlock(); 1119c141ebdaSPhilipp Reisner 1120bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); 1121bde89a9eSAndreas Gruenbacher if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { 1122bde89a9eSAndreas Gruenbacher clear_bit(STATE_SENT, &connection->flags); 11231e86ac48SPhilipp Reisner return 0; 1124a1096a6eSPhilipp Reisner } 11251e86ac48SPhilipp Reisner 11261c03e520SPhilipp Reisner drbd_thread_start(&connection->ack_receiver); 1127668700b4SPhilipp Reisner connection->ack_sender = create_singlethread_workqueue("drbd_ack_sender"); 1128668700b4SPhilipp Reisner if (!connection->ack_sender) { 1129668700b4SPhilipp Reisner drbd_err(connection, "Failed to create workqueue ack_sender\n"); 1130668700b4SPhilipp Reisner return 0; 1131668700b4SPhilipp Reisner } 1132b411b363SPhilipp Reisner 11330500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 113408b165baSPhilipp Reisner /* The discard_my_data flag is a single-shot modifier to the next 113508b165baSPhilipp Reisner * connection attempt, the handshake of which is now well underway. 113608b165baSPhilipp Reisner * No need for rcu style copying of the whole struct 113708b165baSPhilipp Reisner * just to clear a single value. */ 1138bde89a9eSAndreas Gruenbacher connection->net_conf->discard_my_data = 0; 11390500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 114008b165baSPhilipp Reisner 1141d3fcb490SPhilipp Reisner return h; 1142b411b363SPhilipp Reisner 1143b411b363SPhilipp Reisner out_release_sockets: 11447a426fd8SPhilipp Reisner if (ad.s_listen) 11457a426fd8SPhilipp Reisner sock_release(ad.s_listen); 11467da35862SPhilipp Reisner if (sock.socket) 11477da35862SPhilipp Reisner sock_release(sock.socket); 11487da35862SPhilipp Reisner if (msock.socket) 11497da35862SPhilipp Reisner sock_release(msock.socket); 1150b411b363SPhilipp Reisner return -1; 1151b411b363SPhilipp Reisner } 1152b411b363SPhilipp Reisner 1153bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) 1154b411b363SPhilipp Reisner { 1155bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 1156b411b363SPhilipp Reisner 11570c8e36d9SAndreas Gruenbacher if (header_size == sizeof(struct p_header100) && 11580c8e36d9SAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { 11590c8e36d9SAndreas Gruenbacher struct p_header100 *h = header; 11600c8e36d9SAndreas Gruenbacher if (h->pad != 0) { 11611ec861ebSAndreas Gruenbacher drbd_err(connection, "Header padding is not zero\n"); 11620c8e36d9SAndreas Gruenbacher return -EINVAL; 116302918be2SPhilipp Reisner } 11640c8e36d9SAndreas Gruenbacher pi->vnr = be16_to_cpu(h->volume); 11650c8e36d9SAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 11660c8e36d9SAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 11670c8e36d9SAndreas Gruenbacher } else if (header_size == sizeof(struct p_header95) && 1168e658983aSAndreas Gruenbacher *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { 1169e658983aSAndreas Gruenbacher struct p_header95 *h = header; 1170e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1171b55d84baSAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 1172eefc2f7dSPhilipp Reisner pi->vnr = 0; 1173e658983aSAndreas Gruenbacher } else if (header_size == sizeof(struct p_header80) && 1174e658983aSAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { 1175e658983aSAndreas Gruenbacher struct p_header80 *h = header; 1176e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1177e658983aSAndreas Gruenbacher pi->size = be16_to_cpu(h->length); 117877351055SPhilipp Reisner pi->vnr = 0; 117902918be2SPhilipp Reisner } else { 11801ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", 1181e658983aSAndreas Gruenbacher be32_to_cpu(*(__be32 *)header), 1182bde89a9eSAndreas Gruenbacher connection->agreed_pro_version); 11838172f3e9SAndreas Gruenbacher return -EINVAL; 1184b411b363SPhilipp Reisner } 1185e658983aSAndreas Gruenbacher pi->data = header + header_size; 11868172f3e9SAndreas Gruenbacher return 0; 1187b411b363SPhilipp Reisner } 1188b411b363SPhilipp Reisner 1189bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) 1190257d0af6SPhilipp Reisner { 1191bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 119269bc7bc3SAndreas Gruenbacher int err; 1193257d0af6SPhilipp Reisner 1194bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); 1195a5c31904SAndreas Gruenbacher if (err) 119669bc7bc3SAndreas Gruenbacher return err; 1197257d0af6SPhilipp Reisner 1198bde89a9eSAndreas Gruenbacher err = decode_header(connection, buffer, pi); 1199bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1200b411b363SPhilipp Reisner 120169bc7bc3SAndreas Gruenbacher return err; 1202b411b363SPhilipp Reisner } 1203b411b363SPhilipp Reisner 1204bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection) 1205b411b363SPhilipp Reisner { 1206b411b363SPhilipp Reisner int rv; 1207c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 12084b0007c0SPhilipp Reisner int vnr; 1209b411b363SPhilipp Reisner 1210f6ba8636SAndreas Gruenbacher if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { 1211615e087fSLars Ellenberg rcu_read_lock(); 1212c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1213c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1214c06ece6bSAndreas Gruenbacher 1215b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 1216615e087fSLars Ellenberg continue; 1217b30ab791SAndreas Gruenbacher kref_get(&device->kref); 1218615e087fSLars Ellenberg rcu_read_unlock(); 12194b0007c0SPhilipp Reisner 1220f418815fSLars Ellenberg /* Right now, we have only this one synchronous code path 1221f418815fSLars Ellenberg * for flushes between request epochs. 1222f418815fSLars Ellenberg * We may want to make those asynchronous, 1223f418815fSLars Ellenberg * or at least parallelize the flushes to the volume devices. 1224f418815fSLars Ellenberg */ 1225f418815fSLars Ellenberg device->flush_jif = jiffies; 1226f418815fSLars Ellenberg set_bit(FLUSH_PENDING, &device->flags); 1227b30ab791SAndreas Gruenbacher rv = blkdev_issue_flush(device->ldev->backing_bdev, 1228615e087fSLars Ellenberg GFP_NOIO, NULL); 1229f418815fSLars Ellenberg clear_bit(FLUSH_PENDING, &device->flags); 1230b411b363SPhilipp Reisner if (rv) { 1231d0180171SAndreas Gruenbacher drbd_info(device, "local disk flush failed with status %d\n", rv); 1232b411b363SPhilipp Reisner /* would rather check on EOPNOTSUPP, but that is not reliable. 1233b411b363SPhilipp Reisner * don't try again for ANY return value != 0 1234b411b363SPhilipp Reisner * if (rv == -EOPNOTSUPP) */ 1235f6ba8636SAndreas Gruenbacher drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); 1236b411b363SPhilipp Reisner } 1237b30ab791SAndreas Gruenbacher put_ldev(device); 123805a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1239615e087fSLars Ellenberg 1240615e087fSLars Ellenberg rcu_read_lock(); 1241615e087fSLars Ellenberg if (rv) 12424b0007c0SPhilipp Reisner break; 1243b411b363SPhilipp Reisner } 1244615e087fSLars Ellenberg rcu_read_unlock(); 1245b411b363SPhilipp Reisner } 1246b411b363SPhilipp Reisner } 1247b411b363SPhilipp Reisner 1248b411b363SPhilipp Reisner /** 1249b411b363SPhilipp Reisner * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. 1250b30ab791SAndreas Gruenbacher * @device: DRBD device. 1251b411b363SPhilipp Reisner * @epoch: Epoch object. 1252b411b363SPhilipp Reisner * @ev: Epoch event. 1253b411b363SPhilipp Reisner */ 1254bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, 1255b411b363SPhilipp Reisner struct drbd_epoch *epoch, 1256b411b363SPhilipp Reisner enum epoch_event ev) 1257b411b363SPhilipp Reisner { 12582451fc3bSPhilipp Reisner int epoch_size; 1259b411b363SPhilipp Reisner struct drbd_epoch *next_epoch; 1260b411b363SPhilipp Reisner enum finish_epoch rv = FE_STILL_LIVE; 1261b411b363SPhilipp Reisner 1262bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1263b411b363SPhilipp Reisner do { 1264b411b363SPhilipp Reisner next_epoch = NULL; 1265b411b363SPhilipp Reisner 1266b411b363SPhilipp Reisner epoch_size = atomic_read(&epoch->epoch_size); 1267b411b363SPhilipp Reisner 1268b411b363SPhilipp Reisner switch (ev & ~EV_CLEANUP) { 1269b411b363SPhilipp Reisner case EV_PUT: 1270b411b363SPhilipp Reisner atomic_dec(&epoch->active); 1271b411b363SPhilipp Reisner break; 1272b411b363SPhilipp Reisner case EV_GOT_BARRIER_NR: 1273b411b363SPhilipp Reisner set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 1274b411b363SPhilipp Reisner break; 1275b411b363SPhilipp Reisner case EV_BECAME_LAST: 1276b411b363SPhilipp Reisner /* nothing to do*/ 1277b411b363SPhilipp Reisner break; 1278b411b363SPhilipp Reisner } 1279b411b363SPhilipp Reisner 1280b411b363SPhilipp Reisner if (epoch_size != 0 && 1281b411b363SPhilipp Reisner atomic_read(&epoch->active) == 0 && 128280f9fd55SPhilipp Reisner (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 1283b411b363SPhilipp Reisner if (!(ev & EV_CLEANUP)) { 1284bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1285bde89a9eSAndreas Gruenbacher drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); 1286bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1287b411b363SPhilipp Reisner } 12889ed57dcbSLars Ellenberg #if 0 12899ed57dcbSLars Ellenberg /* FIXME: dec unacked on connection, once we have 12909ed57dcbSLars Ellenberg * something to count pending connection packets in. */ 129180f9fd55SPhilipp Reisner if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 1292bde89a9eSAndreas Gruenbacher dec_unacked(epoch->connection); 12939ed57dcbSLars Ellenberg #endif 1294b411b363SPhilipp Reisner 1295bde89a9eSAndreas Gruenbacher if (connection->current_epoch != epoch) { 1296b411b363SPhilipp Reisner next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 1297b411b363SPhilipp Reisner list_del(&epoch->list); 1298b411b363SPhilipp Reisner ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 1299bde89a9eSAndreas Gruenbacher connection->epochs--; 1300b411b363SPhilipp Reisner kfree(epoch); 1301b411b363SPhilipp Reisner 1302b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1303b411b363SPhilipp Reisner rv = FE_DESTROYED; 1304b411b363SPhilipp Reisner } else { 1305b411b363SPhilipp Reisner epoch->flags = 0; 1306b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1307698f9315SUwe Kleine-König /* atomic_set(&epoch->active, 0); is already zero */ 1308b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1309b411b363SPhilipp Reisner rv = FE_RECYCLED; 1310b411b363SPhilipp Reisner } 1311b411b363SPhilipp Reisner } 1312b411b363SPhilipp Reisner 1313b411b363SPhilipp Reisner if (!next_epoch) 1314b411b363SPhilipp Reisner break; 1315b411b363SPhilipp Reisner 1316b411b363SPhilipp Reisner epoch = next_epoch; 1317b411b363SPhilipp Reisner } while (1); 1318b411b363SPhilipp Reisner 1319bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1320b411b363SPhilipp Reisner 1321b411b363SPhilipp Reisner return rv; 1322b411b363SPhilipp Reisner } 1323b411b363SPhilipp Reisner 13248fe39aacSPhilipp Reisner static enum write_ordering_e 13258fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) 13268fe39aacSPhilipp Reisner { 13278fe39aacSPhilipp Reisner struct disk_conf *dc; 13288fe39aacSPhilipp Reisner 13298fe39aacSPhilipp Reisner dc = rcu_dereference(bdev->disk_conf); 13308fe39aacSPhilipp Reisner 1331f6ba8636SAndreas Gruenbacher if (wo == WO_BDEV_FLUSH && !dc->disk_flushes) 1332f6ba8636SAndreas Gruenbacher wo = WO_DRAIN_IO; 1333f6ba8636SAndreas Gruenbacher if (wo == WO_DRAIN_IO && !dc->disk_drain) 1334f6ba8636SAndreas Gruenbacher wo = WO_NONE; 13358fe39aacSPhilipp Reisner 13368fe39aacSPhilipp Reisner return wo; 13378fe39aacSPhilipp Reisner } 13388fe39aacSPhilipp Reisner 1339b411b363SPhilipp Reisner /** 1340b411b363SPhilipp Reisner * drbd_bump_write_ordering() - Fall back to an other write ordering method 1341bde89a9eSAndreas Gruenbacher * @connection: DRBD connection. 1342b411b363SPhilipp Reisner * @wo: Write ordering method to try. 1343b411b363SPhilipp Reisner */ 13448fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, 13458fe39aacSPhilipp Reisner enum write_ordering_e wo) 1346b411b363SPhilipp Reisner { 1347e9526580SPhilipp Reisner struct drbd_device *device; 1348b411b363SPhilipp Reisner enum write_ordering_e pwo; 13494b0007c0SPhilipp Reisner int vnr; 1350b411b363SPhilipp Reisner static char *write_ordering_str[] = { 1351f6ba8636SAndreas Gruenbacher [WO_NONE] = "none", 1352f6ba8636SAndreas Gruenbacher [WO_DRAIN_IO] = "drain", 1353f6ba8636SAndreas Gruenbacher [WO_BDEV_FLUSH] = "flush", 1354b411b363SPhilipp Reisner }; 1355b411b363SPhilipp Reisner 1356e9526580SPhilipp Reisner pwo = resource->write_ordering; 1357f6ba8636SAndreas Gruenbacher if (wo != WO_BDEV_FLUSH) 1358b411b363SPhilipp Reisner wo = min(pwo, wo); 1359daeda1ccSPhilipp Reisner rcu_read_lock(); 1360e9526580SPhilipp Reisner idr_for_each_entry(&resource->devices, device, vnr) { 13618fe39aacSPhilipp Reisner if (get_ldev(device)) { 13628fe39aacSPhilipp Reisner wo = max_allowed_wo(device->ldev, wo); 13638fe39aacSPhilipp Reisner if (device->ldev == bdev) 13648fe39aacSPhilipp Reisner bdev = NULL; 1365b30ab791SAndreas Gruenbacher put_ldev(device); 13664b0007c0SPhilipp Reisner } 13678fe39aacSPhilipp Reisner } 13688fe39aacSPhilipp Reisner 13698fe39aacSPhilipp Reisner if (bdev) 13708fe39aacSPhilipp Reisner wo = max_allowed_wo(bdev, wo); 13718fe39aacSPhilipp Reisner 137270df7092SLars Ellenberg rcu_read_unlock(); 137370df7092SLars Ellenberg 1374e9526580SPhilipp Reisner resource->write_ordering = wo; 1375f6ba8636SAndreas Gruenbacher if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH) 1376e9526580SPhilipp Reisner drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 1377b411b363SPhilipp Reisner } 1378b411b363SPhilipp Reisner 1379b411b363SPhilipp Reisner /** 1380fbe29decSAndreas Gruenbacher * drbd_submit_peer_request() 1381b30ab791SAndreas Gruenbacher * @device: DRBD device. 1382db830c46SAndreas Gruenbacher * @peer_req: peer request 138345bb912bSLars Ellenberg * @rw: flag field, see bio->bi_rw 138410f6d992SLars Ellenberg * 138510f6d992SLars Ellenberg * May spread the pages to multiple bios, 138610f6d992SLars Ellenberg * depending on bio_add_page restrictions. 138710f6d992SLars Ellenberg * 138810f6d992SLars Ellenberg * Returns 0 if all bios have been submitted, 138910f6d992SLars Ellenberg * -ENOMEM if we could not allocate enough bios, 139010f6d992SLars Ellenberg * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a 139110f6d992SLars Ellenberg * single page to an empty bio (which should never happen and likely indicates 139210f6d992SLars Ellenberg * that the lower level IO stack is in some way broken). This has been observed 139310f6d992SLars Ellenberg * on certain Xen deployments. 139445bb912bSLars Ellenberg */ 139545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */ 1396b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device, 1397fbe29decSAndreas Gruenbacher struct drbd_peer_request *peer_req, 139845bb912bSLars Ellenberg const unsigned rw, const int fault_type) 139945bb912bSLars Ellenberg { 140045bb912bSLars Ellenberg struct bio *bios = NULL; 140145bb912bSLars Ellenberg struct bio *bio; 1402db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 1403db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 140411f8b2b6SAndreas Gruenbacher unsigned data_size = peer_req->i.size; 140545bb912bSLars Ellenberg unsigned n_bios = 0; 140611f8b2b6SAndreas Gruenbacher unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 140710f6d992SLars Ellenberg int err = -ENOMEM; 140845bb912bSLars Ellenberg 1409a0fb3c47SLars Ellenberg if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { 1410a0fb3c47SLars Ellenberg /* wait for all pending IO completions, before we start 1411a0fb3c47SLars Ellenberg * zeroing things out. */ 14125dd2ca19SAndreas Gruenbacher conn_wait_active_ee_empty(peer_req->peer_device->connection); 141345d2933cSLars Ellenberg /* add it to the active list now, 141445d2933cSLars Ellenberg * so we can find it to present it in debugfs */ 141521ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 141621ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED; 141745d2933cSLars Ellenberg spin_lock_irq(&device->resource->req_lock); 141845d2933cSLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee); 141945d2933cSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 1420a0fb3c47SLars Ellenberg if (blkdev_issue_zeroout(device->ldev->backing_bdev, 1421d93ba7a5SMartin K. Petersen sector, data_size >> 9, GFP_NOIO, false)) 1422a0fb3c47SLars Ellenberg peer_req->flags |= EE_WAS_ERROR; 1423a0fb3c47SLars Ellenberg drbd_endio_write_sec_final(peer_req); 1424a0fb3c47SLars Ellenberg return 0; 1425a0fb3c47SLars Ellenberg } 1426a0fb3c47SLars Ellenberg 142754ed4ed8SLars Ellenberg /* Discards don't have any payload. 142854ed4ed8SLars Ellenberg * But the scsi layer still expects a bio_vec it can use internally, 142954ed4ed8SLars Ellenberg * see sd_setup_discard_cmnd() and blk_add_request_payload(). */ 1430a0fb3c47SLars Ellenberg if (peer_req->flags & EE_IS_TRIM) 143154ed4ed8SLars Ellenberg nr_pages = 1; 1432a0fb3c47SLars Ellenberg 143345bb912bSLars Ellenberg /* In most cases, we will only need one bio. But in case the lower 143445bb912bSLars Ellenberg * level restrictions happen to be different at this offset on this 143545bb912bSLars Ellenberg * side than those of the sending peer, we may need to submit the 14369476f39dSLars Ellenberg * request in more than one bio. 14379476f39dSLars Ellenberg * 14389476f39dSLars Ellenberg * Plain bio_alloc is good enough here, this is no DRBD internally 14399476f39dSLars Ellenberg * generated bio, but a bio allocated on behalf of the peer. 14409476f39dSLars Ellenberg */ 144145bb912bSLars Ellenberg next_bio: 144245bb912bSLars Ellenberg bio = bio_alloc(GFP_NOIO, nr_pages); 144345bb912bSLars Ellenberg if (!bio) { 1444a0fb3c47SLars Ellenberg drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); 144545bb912bSLars Ellenberg goto fail; 144645bb912bSLars Ellenberg } 1447db830c46SAndreas Gruenbacher /* > peer_req->i.sector, unless this is the first bio */ 14484f024f37SKent Overstreet bio->bi_iter.bi_sector = sector; 1449b30ab791SAndreas Gruenbacher bio->bi_bdev = device->ldev->backing_bdev; 145045bb912bSLars Ellenberg bio->bi_rw = rw; 1451db830c46SAndreas Gruenbacher bio->bi_private = peer_req; 1452fcefa62eSAndreas Gruenbacher bio->bi_end_io = drbd_peer_request_endio; 145345bb912bSLars Ellenberg 145445bb912bSLars Ellenberg bio->bi_next = bios; 145545bb912bSLars Ellenberg bios = bio; 145645bb912bSLars Ellenberg ++n_bios; 145745bb912bSLars Ellenberg 1458a0fb3c47SLars Ellenberg if (rw & REQ_DISCARD) { 145911f8b2b6SAndreas Gruenbacher bio->bi_iter.bi_size = data_size; 1460a0fb3c47SLars Ellenberg goto submit; 1461a0fb3c47SLars Ellenberg } 1462a0fb3c47SLars Ellenberg 146345bb912bSLars Ellenberg page_chain_for_each(page) { 146411f8b2b6SAndreas Gruenbacher unsigned len = min_t(unsigned, data_size, PAGE_SIZE); 146545bb912bSLars Ellenberg if (!bio_add_page(bio, page, len, 0)) { 146610f6d992SLars Ellenberg /* A single page must always be possible! 146710f6d992SLars Ellenberg * But in case it fails anyways, 146810f6d992SLars Ellenberg * we deal with it, and complain (below). */ 146910f6d992SLars Ellenberg if (bio->bi_vcnt == 0) { 1470d0180171SAndreas Gruenbacher drbd_err(device, 147110f6d992SLars Ellenberg "bio_add_page failed for len=%u, " 147210f6d992SLars Ellenberg "bi_vcnt=0 (bi_sector=%llu)\n", 14734f024f37SKent Overstreet len, (uint64_t)bio->bi_iter.bi_sector); 147410f6d992SLars Ellenberg err = -ENOSPC; 147510f6d992SLars Ellenberg goto fail; 147610f6d992SLars Ellenberg } 147745bb912bSLars Ellenberg goto next_bio; 147845bb912bSLars Ellenberg } 147911f8b2b6SAndreas Gruenbacher data_size -= len; 148045bb912bSLars Ellenberg sector += len >> 9; 148145bb912bSLars Ellenberg --nr_pages; 148245bb912bSLars Ellenberg } 148311f8b2b6SAndreas Gruenbacher D_ASSERT(device, data_size == 0); 1484a0fb3c47SLars Ellenberg submit: 1485a0fb3c47SLars Ellenberg D_ASSERT(device, page == NULL); 148645bb912bSLars Ellenberg 1487db830c46SAndreas Gruenbacher atomic_set(&peer_req->pending_bios, n_bios); 148821ae5d7fSLars Ellenberg /* for debugfs: update timestamp, mark as submitted */ 148921ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 149021ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED; 149145bb912bSLars Ellenberg do { 149245bb912bSLars Ellenberg bio = bios; 149345bb912bSLars Ellenberg bios = bios->bi_next; 149445bb912bSLars Ellenberg bio->bi_next = NULL; 149545bb912bSLars Ellenberg 1496b30ab791SAndreas Gruenbacher drbd_generic_make_request(device, fault_type, bio); 149745bb912bSLars Ellenberg } while (bios); 149845bb912bSLars Ellenberg return 0; 149945bb912bSLars Ellenberg 150045bb912bSLars Ellenberg fail: 150145bb912bSLars Ellenberg while (bios) { 150245bb912bSLars Ellenberg bio = bios; 150345bb912bSLars Ellenberg bios = bios->bi_next; 150445bb912bSLars Ellenberg bio_put(bio); 150545bb912bSLars Ellenberg } 150610f6d992SLars Ellenberg return err; 150745bb912bSLars Ellenberg } 150845bb912bSLars Ellenberg 1509b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device, 1510db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 151153840641SAndreas Gruenbacher { 1512db830c46SAndreas Gruenbacher struct drbd_interval *i = &peer_req->i; 151353840641SAndreas Gruenbacher 1514b30ab791SAndreas Gruenbacher drbd_remove_interval(&device->write_requests, i); 151553840641SAndreas Gruenbacher drbd_clear_interval(i); 151653840641SAndreas Gruenbacher 15176c852becSAndreas Gruenbacher /* Wake up any processes waiting for this peer request to complete. */ 151853840641SAndreas Gruenbacher if (i->waiting) 1519b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 152053840641SAndreas Gruenbacher } 152153840641SAndreas Gruenbacher 1522bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection) 152377fede51SPhilipp Reisner { 1524c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 152577fede51SPhilipp Reisner int vnr; 152677fede51SPhilipp Reisner 152777fede51SPhilipp Reisner rcu_read_lock(); 1528c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1529c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1530c06ece6bSAndreas Gruenbacher 1531b30ab791SAndreas Gruenbacher kref_get(&device->kref); 153277fede51SPhilipp Reisner rcu_read_unlock(); 1533b30ab791SAndreas Gruenbacher drbd_wait_ee_list_empty(device, &device->active_ee); 153405a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 153577fede51SPhilipp Reisner rcu_read_lock(); 153677fede51SPhilipp Reisner } 153777fede51SPhilipp Reisner rcu_read_unlock(); 153877fede51SPhilipp Reisner } 153977fede51SPhilipp Reisner 1540bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) 1541b411b363SPhilipp Reisner { 15422451fc3bSPhilipp Reisner int rv; 1543e658983aSAndreas Gruenbacher struct p_barrier *p = pi->data; 1544b411b363SPhilipp Reisner struct drbd_epoch *epoch; 1545b411b363SPhilipp Reisner 15469ed57dcbSLars Ellenberg /* FIXME these are unacked on connection, 15479ed57dcbSLars Ellenberg * not a specific (peer)device. 15489ed57dcbSLars Ellenberg */ 1549bde89a9eSAndreas Gruenbacher connection->current_epoch->barrier_nr = p->barrier; 1550bde89a9eSAndreas Gruenbacher connection->current_epoch->connection = connection; 1551bde89a9eSAndreas Gruenbacher rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); 1552b411b363SPhilipp Reisner 1553b411b363SPhilipp Reisner /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 1554b411b363SPhilipp Reisner * the activity log, which means it would not be resynced in case the 1555b411b363SPhilipp Reisner * R_PRIMARY crashes now. 1556b411b363SPhilipp Reisner * Therefore we must send the barrier_ack after the barrier request was 1557b411b363SPhilipp Reisner * completed. */ 1558e9526580SPhilipp Reisner switch (connection->resource->write_ordering) { 1559f6ba8636SAndreas Gruenbacher case WO_NONE: 1560b411b363SPhilipp Reisner if (rv == FE_RECYCLED) 156182bc0194SAndreas Gruenbacher return 0; 1562b411b363SPhilipp Reisner 1563b411b363SPhilipp Reisner /* receiver context, in the writeout path of the other node. 1564b411b363SPhilipp Reisner * avoid potential distributed deadlock */ 1565b411b363SPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 15662451fc3bSPhilipp Reisner if (epoch) 15672451fc3bSPhilipp Reisner break; 15682451fc3bSPhilipp Reisner else 15691ec861ebSAndreas Gruenbacher drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); 15702451fc3bSPhilipp Reisner /* Fall through */ 15712451fc3bSPhilipp Reisner 1572f6ba8636SAndreas Gruenbacher case WO_BDEV_FLUSH: 1573f6ba8636SAndreas Gruenbacher case WO_DRAIN_IO: 1574bde89a9eSAndreas Gruenbacher conn_wait_active_ee_empty(connection); 1575bde89a9eSAndreas Gruenbacher drbd_flush(connection); 15762451fc3bSPhilipp Reisner 1577bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 15782451fc3bSPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 15792451fc3bSPhilipp Reisner if (epoch) 15802451fc3bSPhilipp Reisner break; 1581b411b363SPhilipp Reisner } 1582b411b363SPhilipp Reisner 158382bc0194SAndreas Gruenbacher return 0; 15842451fc3bSPhilipp Reisner default: 1585e9526580SPhilipp Reisner drbd_err(connection, "Strangeness in connection->write_ordering %d\n", 1586e9526580SPhilipp Reisner connection->resource->write_ordering); 158782bc0194SAndreas Gruenbacher return -EIO; 1588b411b363SPhilipp Reisner } 1589b411b363SPhilipp Reisner 1590b411b363SPhilipp Reisner epoch->flags = 0; 1591b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1592b411b363SPhilipp Reisner atomic_set(&epoch->active, 0); 1593b411b363SPhilipp Reisner 1594bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1595bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 1596bde89a9eSAndreas Gruenbacher list_add(&epoch->list, &connection->current_epoch->list); 1597bde89a9eSAndreas Gruenbacher connection->current_epoch = epoch; 1598bde89a9eSAndreas Gruenbacher connection->epochs++; 1599b411b363SPhilipp Reisner } else { 1600b411b363SPhilipp Reisner /* The current_epoch got recycled while we allocated this one... */ 1601b411b363SPhilipp Reisner kfree(epoch); 1602b411b363SPhilipp Reisner } 1603bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1604b411b363SPhilipp Reisner 160582bc0194SAndreas Gruenbacher return 0; 1606b411b363SPhilipp Reisner } 1607b411b363SPhilipp Reisner 1608b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read) 1609b411b363SPhilipp Reisner * and from receive_Data */ 1610f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request * 161169a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 1612a0fb3c47SLars Ellenberg struct packet_info *pi) __must_hold(local) 1613b411b363SPhilipp Reisner { 161469a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1615b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 1616db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 1617b411b363SPhilipp Reisner struct page *page; 161811f8b2b6SAndreas Gruenbacher int digest_size, err; 161911f8b2b6SAndreas Gruenbacher unsigned int data_size = pi->size, ds; 162069a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 162169a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 16226b4388acSPhilipp Reisner unsigned long *data; 1623a0fb3c47SLars Ellenberg struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 1624b411b363SPhilipp Reisner 162511f8b2b6SAndreas Gruenbacher digest_size = 0; 1626a0fb3c47SLars Ellenberg if (!trim && peer_device->connection->peer_integrity_tfm) { 162711f8b2b6SAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); 16289f5bdc33SAndreas Gruenbacher /* 16299f5bdc33SAndreas Gruenbacher * FIXME: Receive the incoming digest into the receive buffer 16309f5bdc33SAndreas Gruenbacher * here, together with its struct p_data? 16319f5bdc33SAndreas Gruenbacher */ 163211f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 1633a5c31904SAndreas Gruenbacher if (err) 1634b411b363SPhilipp Reisner return NULL; 163511f8b2b6SAndreas Gruenbacher data_size -= digest_size; 163688104ca4SAndreas Gruenbacher } 1637b411b363SPhilipp Reisner 1638a0fb3c47SLars Ellenberg if (trim) { 1639a0fb3c47SLars Ellenberg D_ASSERT(peer_device, data_size == 0); 1640a0fb3c47SLars Ellenberg data_size = be32_to_cpu(trim->size); 1641a0fb3c47SLars Ellenberg } 1642a0fb3c47SLars Ellenberg 1643841ce241SAndreas Gruenbacher if (!expect(IS_ALIGNED(data_size, 512))) 1644841ce241SAndreas Gruenbacher return NULL; 1645a0fb3c47SLars Ellenberg /* prepare for larger trim requests. */ 1646a0fb3c47SLars Ellenberg if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE)) 1647841ce241SAndreas Gruenbacher return NULL; 1648b411b363SPhilipp Reisner 16496666032aSLars Ellenberg /* even though we trust out peer, 16506666032aSLars Ellenberg * we sometimes have to double check. */ 16516666032aSLars Ellenberg if (sector + (data_size>>9) > capacity) { 1652d0180171SAndreas Gruenbacher drbd_err(device, "request from peer beyond end of local disk: " 1653fdda6544SLars Ellenberg "capacity: %llus < sector: %llus + size: %u\n", 16546666032aSLars Ellenberg (unsigned long long)capacity, 16556666032aSLars Ellenberg (unsigned long long)sector, data_size); 16566666032aSLars Ellenberg return NULL; 16576666032aSLars Ellenberg } 16586666032aSLars Ellenberg 1659b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 1660b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 1661b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 1662a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO); 1663db830c46SAndreas Gruenbacher if (!peer_req) 1664b411b363SPhilipp Reisner return NULL; 166545bb912bSLars Ellenberg 166621ae5d7fSLars Ellenberg peer_req->flags |= EE_WRITE; 1667a0fb3c47SLars Ellenberg if (trim) 166881a3537aSLars Ellenberg return peer_req; 1669a73ff323SLars Ellenberg 1670b411b363SPhilipp Reisner ds = data_size; 1671db830c46SAndreas Gruenbacher page = peer_req->pages; 167245bb912bSLars Ellenberg page_chain_for_each(page) { 167345bb912bSLars Ellenberg unsigned len = min_t(int, ds, PAGE_SIZE); 16746b4388acSPhilipp Reisner data = kmap(page); 167569a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1676b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { 1677d0180171SAndreas Gruenbacher drbd_err(device, "Fault injection: Corrupting data on receive\n"); 16786b4388acSPhilipp Reisner data[0] = data[0] ^ (unsigned long)-1; 16796b4388acSPhilipp Reisner } 1680b411b363SPhilipp Reisner kunmap(page); 1681a5c31904SAndreas Gruenbacher if (err) { 1682b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1683b411b363SPhilipp Reisner return NULL; 1684b411b363SPhilipp Reisner } 1685a5c31904SAndreas Gruenbacher ds -= len; 1686b411b363SPhilipp Reisner } 1687b411b363SPhilipp Reisner 168811f8b2b6SAndreas Gruenbacher if (digest_size) { 168969a22773SAndreas Gruenbacher drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv); 169011f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) { 1691d0180171SAndreas Gruenbacher drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", 1692470be44aSLars Ellenberg (unsigned long long)sector, data_size); 1693b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1694b411b363SPhilipp Reisner return NULL; 1695b411b363SPhilipp Reisner } 1696b411b363SPhilipp Reisner } 1697b30ab791SAndreas Gruenbacher device->recv_cnt += data_size >> 9; 1698db830c46SAndreas Gruenbacher return peer_req; 1699b411b363SPhilipp Reisner } 1700b411b363SPhilipp Reisner 1701b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block 1702b411b363SPhilipp Reisner * out of the socket input buffer, and discards it. 1703b411b363SPhilipp Reisner */ 170469a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) 1705b411b363SPhilipp Reisner { 1706b411b363SPhilipp Reisner struct page *page; 1707a5c31904SAndreas Gruenbacher int err = 0; 1708b411b363SPhilipp Reisner void *data; 1709b411b363SPhilipp Reisner 1710c3470cdeSLars Ellenberg if (!data_size) 1711fc5be839SAndreas Gruenbacher return 0; 1712c3470cdeSLars Ellenberg 171369a22773SAndreas Gruenbacher page = drbd_alloc_pages(peer_device, 1, 1); 1714b411b363SPhilipp Reisner 1715b411b363SPhilipp Reisner data = kmap(page); 1716b411b363SPhilipp Reisner while (data_size) { 1717fc5be839SAndreas Gruenbacher unsigned int len = min_t(int, data_size, PAGE_SIZE); 1718fc5be839SAndreas Gruenbacher 171969a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1720a5c31904SAndreas Gruenbacher if (err) 1721b411b363SPhilipp Reisner break; 1722a5c31904SAndreas Gruenbacher data_size -= len; 1723b411b363SPhilipp Reisner } 1724b411b363SPhilipp Reisner kunmap(page); 172569a22773SAndreas Gruenbacher drbd_free_pages(peer_device->device, page, 0); 1726fc5be839SAndreas Gruenbacher return err; 1727b411b363SPhilipp Reisner } 1728b411b363SPhilipp Reisner 172969a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, 1730b411b363SPhilipp Reisner sector_t sector, int data_size) 1731b411b363SPhilipp Reisner { 17327988613bSKent Overstreet struct bio_vec bvec; 17337988613bSKent Overstreet struct bvec_iter iter; 1734b411b363SPhilipp Reisner struct bio *bio; 173511f8b2b6SAndreas Gruenbacher int digest_size, err, expect; 173669a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 173769a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 1738b411b363SPhilipp Reisner 173911f8b2b6SAndreas Gruenbacher digest_size = 0; 174069a22773SAndreas Gruenbacher if (peer_device->connection->peer_integrity_tfm) { 174111f8b2b6SAndreas Gruenbacher digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); 174211f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 1743a5c31904SAndreas Gruenbacher if (err) 1744a5c31904SAndreas Gruenbacher return err; 174511f8b2b6SAndreas Gruenbacher data_size -= digest_size; 174688104ca4SAndreas Gruenbacher } 1747b411b363SPhilipp Reisner 1748b411b363SPhilipp Reisner /* optimistically update recv_cnt. if receiving fails below, 1749b411b363SPhilipp Reisner * we disconnect anyways, and counters will be reset. */ 175069a22773SAndreas Gruenbacher peer_device->device->recv_cnt += data_size>>9; 1751b411b363SPhilipp Reisner 1752b411b363SPhilipp Reisner bio = req->master_bio; 175369a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); 1754b411b363SPhilipp Reisner 17557988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 17567988613bSKent Overstreet void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; 17577988613bSKent Overstreet expect = min_t(int, data_size, bvec.bv_len); 175869a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, mapped, expect); 17597988613bSKent Overstreet kunmap(bvec.bv_page); 1760a5c31904SAndreas Gruenbacher if (err) 1761a5c31904SAndreas Gruenbacher return err; 1762a5c31904SAndreas Gruenbacher data_size -= expect; 1763b411b363SPhilipp Reisner } 1764b411b363SPhilipp Reisner 176511f8b2b6SAndreas Gruenbacher if (digest_size) { 176669a22773SAndreas Gruenbacher drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); 176711f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) { 176869a22773SAndreas Gruenbacher drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); 176928284cefSAndreas Gruenbacher return -EINVAL; 1770b411b363SPhilipp Reisner } 1771b411b363SPhilipp Reisner } 1772b411b363SPhilipp Reisner 177369a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, data_size == 0); 177428284cefSAndreas Gruenbacher return 0; 1775b411b363SPhilipp Reisner } 1776b411b363SPhilipp Reisner 1777a990be46SAndreas Gruenbacher /* 1778668700b4SPhilipp Reisner * e_end_resync_block() is called in ack_sender context via 1779a990be46SAndreas Gruenbacher * drbd_finish_peer_reqs(). 1780a990be46SAndreas Gruenbacher */ 178199920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused) 1782b411b363SPhilipp Reisner { 17838050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 1784a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1785a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1786a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1787db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 178899920dc5SAndreas Gruenbacher int err; 1789b411b363SPhilipp Reisner 17900b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 1791b411b363SPhilipp Reisner 1792db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1793b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 1794a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); 1795b411b363SPhilipp Reisner } else { 1796b411b363SPhilipp Reisner /* Record failure to sync */ 1797b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, peer_req->i.size); 1798b411b363SPhilipp Reisner 1799a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 1800b411b363SPhilipp Reisner } 1801b30ab791SAndreas Gruenbacher dec_unacked(device); 1802b411b363SPhilipp Reisner 180399920dc5SAndreas Gruenbacher return err; 1804b411b363SPhilipp Reisner } 1805b411b363SPhilipp Reisner 180669a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, 1807a0fb3c47SLars Ellenberg struct packet_info *pi) __releases(local) 1808b411b363SPhilipp Reisner { 180969a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1810db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 1811b411b363SPhilipp Reisner 1812a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); 1813db830c46SAndreas Gruenbacher if (!peer_req) 181445bb912bSLars Ellenberg goto fail; 1815b411b363SPhilipp Reisner 1816b30ab791SAndreas Gruenbacher dec_rs_pending(device); 1817b411b363SPhilipp Reisner 1818b30ab791SAndreas Gruenbacher inc_unacked(device); 1819b411b363SPhilipp Reisner /* corresponding dec_unacked() in e_end_resync_block() 1820b411b363SPhilipp Reisner * respective _drbd_clear_done_ee */ 1821b411b363SPhilipp Reisner 1822a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_resync_block; 182321ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 182445bb912bSLars Ellenberg 18250500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1826b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->sync_ee); 18270500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1828b411b363SPhilipp Reisner 1829a0fb3c47SLars Ellenberg atomic_add(pi->size >> 9, &device->rs_sect_ev); 1830b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) 1831e1c1b0fcSAndreas Gruenbacher return 0; 183245bb912bSLars Ellenberg 183310f6d992SLars Ellenberg /* don't care for the reason here */ 1834d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 18350500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1836a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 18370500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 183822cc37a9SLars Ellenberg 1839b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 184045bb912bSLars Ellenberg fail: 1841b30ab791SAndreas Gruenbacher put_ldev(device); 1842e1c1b0fcSAndreas Gruenbacher return -EIO; 1843b411b363SPhilipp Reisner } 1844b411b363SPhilipp Reisner 1845668eebc6SAndreas Gruenbacher static struct drbd_request * 1846b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id, 1847bc9c5c41SAndreas Gruenbacher sector_t sector, bool missing_ok, const char *func) 1848b411b363SPhilipp Reisner { 1849b411b363SPhilipp Reisner struct drbd_request *req; 1850668eebc6SAndreas Gruenbacher 1851bc9c5c41SAndreas Gruenbacher /* Request object according to our peer */ 1852bc9c5c41SAndreas Gruenbacher req = (struct drbd_request *)(unsigned long)id; 18535e472264SAndreas Gruenbacher if (drbd_contains_interval(root, sector, &req->i) && req->i.local) 1854668eebc6SAndreas Gruenbacher return req; 1855c3afd8f5SAndreas Gruenbacher if (!missing_ok) { 1856d0180171SAndreas Gruenbacher drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, 1857c3afd8f5SAndreas Gruenbacher (unsigned long)id, (unsigned long long)sector); 1858c3afd8f5SAndreas Gruenbacher } 1859668eebc6SAndreas Gruenbacher return NULL; 1860668eebc6SAndreas Gruenbacher } 1861668eebc6SAndreas Gruenbacher 1862bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) 1863b411b363SPhilipp Reisner { 18649f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1865b30ab791SAndreas Gruenbacher struct drbd_device *device; 1866b411b363SPhilipp Reisner struct drbd_request *req; 1867b411b363SPhilipp Reisner sector_t sector; 186882bc0194SAndreas Gruenbacher int err; 1869e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 18704a76b161SAndreas Gruenbacher 18719f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 18729f4fe9adSAndreas Gruenbacher if (!peer_device) 18734a76b161SAndreas Gruenbacher return -EIO; 18749f4fe9adSAndreas Gruenbacher device = peer_device->device; 1875b411b363SPhilipp Reisner 1876b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 1877b411b363SPhilipp Reisner 18780500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 1879b30ab791SAndreas Gruenbacher req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); 18800500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1881c3afd8f5SAndreas Gruenbacher if (unlikely(!req)) 188282bc0194SAndreas Gruenbacher return -EIO; 1883b411b363SPhilipp Reisner 188424c4830cSBart Van Assche /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid 1885b411b363SPhilipp Reisner * special casing it there for the various failure cases. 1886b411b363SPhilipp Reisner * still no race with drbd_fail_pending_reads */ 188769a22773SAndreas Gruenbacher err = recv_dless_read(peer_device, req, sector, pi->size); 188882bc0194SAndreas Gruenbacher if (!err) 18898554df1cSAndreas Gruenbacher req_mod(req, DATA_RECEIVED); 1890b411b363SPhilipp Reisner /* else: nothing. handled from drbd_disconnect... 1891b411b363SPhilipp Reisner * I don't think we may complete this just yet 1892b411b363SPhilipp Reisner * in case we are "on-disconnect: freeze" */ 1893b411b363SPhilipp Reisner 189482bc0194SAndreas Gruenbacher return err; 1895b411b363SPhilipp Reisner } 1896b411b363SPhilipp Reisner 1897bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) 1898b411b363SPhilipp Reisner { 18999f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1900b30ab791SAndreas Gruenbacher struct drbd_device *device; 1901b411b363SPhilipp Reisner sector_t sector; 190282bc0194SAndreas Gruenbacher int err; 1903e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 19044a76b161SAndreas Gruenbacher 19059f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 19069f4fe9adSAndreas Gruenbacher if (!peer_device) 19074a76b161SAndreas Gruenbacher return -EIO; 19089f4fe9adSAndreas Gruenbacher device = peer_device->device; 1909b411b363SPhilipp Reisner 1910b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 19110b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->block_id == ID_SYNCER); 1912b411b363SPhilipp Reisner 1913b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 1914b411b363SPhilipp Reisner /* data is submitted to disk within recv_resync_read. 1915b411b363SPhilipp Reisner * corresponding put_ldev done below on error, 1916fcefa62eSAndreas Gruenbacher * or in drbd_peer_request_endio. */ 1917a0fb3c47SLars Ellenberg err = recv_resync_read(peer_device, sector, pi); 1918b411b363SPhilipp Reisner } else { 1919b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1920d0180171SAndreas Gruenbacher drbd_err(device, "Can not write resync data to local disk.\n"); 1921b411b363SPhilipp Reisner 192269a22773SAndreas Gruenbacher err = drbd_drain_block(peer_device, pi->size); 1923b411b363SPhilipp Reisner 192469a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 1925b411b363SPhilipp Reisner } 1926b411b363SPhilipp Reisner 1927b30ab791SAndreas Gruenbacher atomic_add(pi->size >> 9, &device->rs_sect_in); 1928778f271dSPhilipp Reisner 192982bc0194SAndreas Gruenbacher return err; 1930b411b363SPhilipp Reisner } 1931b411b363SPhilipp Reisner 1932b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device, 19337be8da07SAndreas Gruenbacher sector_t sector, int size) 1934b411b363SPhilipp Reisner { 19357be8da07SAndreas Gruenbacher struct drbd_interval *i; 19367be8da07SAndreas Gruenbacher struct drbd_request *req; 1937b411b363SPhilipp Reisner 1938b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 19397be8da07SAndreas Gruenbacher if (!i->local) 19407be8da07SAndreas Gruenbacher continue; 19417be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 19427be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 19437be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) 19447be8da07SAndreas Gruenbacher continue; 19452312f0b3SLars Ellenberg /* as it is RQ_POSTPONED, this will cause it to 19462312f0b3SLars Ellenberg * be queued on the retry workqueue. */ 1947d4dabbe2SLars Ellenberg __req_mod(req, CONFLICT_RESOLVED, NULL); 19487be8da07SAndreas Gruenbacher } 19497be8da07SAndreas Gruenbacher } 19507be8da07SAndreas Gruenbacher 1951a990be46SAndreas Gruenbacher /* 1952668700b4SPhilipp Reisner * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs(). 1953b411b363SPhilipp Reisner */ 195499920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel) 1955b411b363SPhilipp Reisner { 19568050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 1957a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 1958a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 1959a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1960db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 196199920dc5SAndreas Gruenbacher int err = 0, pcmd; 1962b411b363SPhilipp Reisner 1963303d1448SPhilipp Reisner if (peer_req->flags & EE_SEND_WRITE_ACK) { 1964db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 1965b30ab791SAndreas Gruenbacher pcmd = (device->state.conn >= C_SYNC_SOURCE && 1966b30ab791SAndreas Gruenbacher device->state.conn <= C_PAUSED_SYNC_T && 1967db830c46SAndreas Gruenbacher peer_req->flags & EE_MAY_SET_IN_SYNC) ? 1968b411b363SPhilipp Reisner P_RS_WRITE_ACK : P_WRITE_ACK; 1969a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, pcmd, peer_req); 1970b411b363SPhilipp Reisner if (pcmd == P_RS_WRITE_ACK) 1971b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 1972b411b363SPhilipp Reisner } else { 1973a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 1974b411b363SPhilipp Reisner /* we expect it to be marked out of sync anyways... 1975b411b363SPhilipp Reisner * maybe assert this? */ 1976b411b363SPhilipp Reisner } 1977b30ab791SAndreas Gruenbacher dec_unacked(device); 1978b411b363SPhilipp Reisner } 197908d0dabfSLars Ellenberg 1980b411b363SPhilipp Reisner /* we delete from the conflict detection hash _after_ we sent out the 1981b411b363SPhilipp Reisner * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1982302bdeaeSPhilipp Reisner if (peer_req->flags & EE_IN_INTERVAL_TREE) { 19830500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 19840b0ba1efSAndreas Gruenbacher D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); 1985b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 19867be8da07SAndreas Gruenbacher if (peer_req->flags & EE_RESTART_REQUESTS) 1987b30ab791SAndreas Gruenbacher restart_conflicting_writes(device, sector, peer_req->i.size); 19880500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 1989bb3bfe96SAndreas Gruenbacher } else 19900b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 1991b411b363SPhilipp Reisner 19925dd2ca19SAndreas Gruenbacher drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 1993b411b363SPhilipp Reisner 199499920dc5SAndreas Gruenbacher return err; 1995b411b363SPhilipp Reisner } 1996b411b363SPhilipp Reisner 1997a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) 1998b411b363SPhilipp Reisner { 19998050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 2000a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2001a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 200299920dc5SAndreas Gruenbacher int err; 2003b411b363SPhilipp Reisner 2004a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, ack, peer_req); 2005a8cd15baSAndreas Gruenbacher dec_unacked(peer_device->device); 2006b411b363SPhilipp Reisner 200799920dc5SAndreas Gruenbacher return err; 2008b411b363SPhilipp Reisner } 2009b411b363SPhilipp Reisner 2010d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused) 2011b6a370baSPhilipp Reisner { 2012a8cd15baSAndreas Gruenbacher return e_send_ack(w, P_SUPERSEDED); 20137be8da07SAndreas Gruenbacher } 2014b6a370baSPhilipp Reisner 201599920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused) 20167be8da07SAndreas Gruenbacher { 2017a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = 2018a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2019a8cd15baSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 20207be8da07SAndreas Gruenbacher 2021a8cd15baSAndreas Gruenbacher return e_send_ack(w, connection->agreed_pro_version >= 100 ? 2022d4dabbe2SLars Ellenberg P_RETRY_WRITE : P_SUPERSEDED); 20237be8da07SAndreas Gruenbacher } 20247be8da07SAndreas Gruenbacher 20253e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b) 20263e394da1SAndreas Gruenbacher { 20273e394da1SAndreas Gruenbacher /* 20283e394da1SAndreas Gruenbacher * We assume 32-bit wrap-around here. 20293e394da1SAndreas Gruenbacher * For 24-bit wrap-around, we would have to shift: 20303e394da1SAndreas Gruenbacher * a <<= 8; b <<= 8; 20313e394da1SAndreas Gruenbacher */ 20323e394da1SAndreas Gruenbacher return (s32)a - (s32)b > 0; 20333e394da1SAndreas Gruenbacher } 20343e394da1SAndreas Gruenbacher 20353e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b) 20363e394da1SAndreas Gruenbacher { 20373e394da1SAndreas Gruenbacher return seq_greater(a, b) ? a : b; 20383e394da1SAndreas Gruenbacher } 20393e394da1SAndreas Gruenbacher 204069a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) 20413e394da1SAndreas Gruenbacher { 204269a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 20433c13b680SLars Ellenberg unsigned int newest_peer_seq; 20443e394da1SAndreas Gruenbacher 204569a22773SAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { 2046b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2047b30ab791SAndreas Gruenbacher newest_peer_seq = seq_max(device->peer_seq, peer_seq); 2048b30ab791SAndreas Gruenbacher device->peer_seq = newest_peer_seq; 2049b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2050b30ab791SAndreas Gruenbacher /* wake up only if we actually changed device->peer_seq */ 20513c13b680SLars Ellenberg if (peer_seq == newest_peer_seq) 2052b30ab791SAndreas Gruenbacher wake_up(&device->seq_wait); 20533e394da1SAndreas Gruenbacher } 20547be8da07SAndreas Gruenbacher } 20553e394da1SAndreas Gruenbacher 2056d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 2057d93f6302SLars Ellenberg { 2058d93f6302SLars Ellenberg return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 2059d93f6302SLars Ellenberg } 2060d93f6302SLars Ellenberg 2061d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */ 2062b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) 2063d93f6302SLars Ellenberg { 2064d93f6302SLars Ellenberg struct drbd_peer_request *rs_req; 2065b6a370baSPhilipp Reisner bool rv = 0; 2066b6a370baSPhilipp Reisner 20670500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2068a8cd15baSAndreas Gruenbacher list_for_each_entry(rs_req, &device->sync_ee, w.list) { 2069d93f6302SLars Ellenberg if (overlaps(peer_req->i.sector, peer_req->i.size, 2070d93f6302SLars Ellenberg rs_req->i.sector, rs_req->i.size)) { 2071b6a370baSPhilipp Reisner rv = 1; 2072b6a370baSPhilipp Reisner break; 2073b6a370baSPhilipp Reisner } 2074b6a370baSPhilipp Reisner } 20750500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2076b6a370baSPhilipp Reisner 2077b6a370baSPhilipp Reisner return rv; 2078b6a370baSPhilipp Reisner } 2079b6a370baSPhilipp Reisner 2080b411b363SPhilipp Reisner /* Called from receive_Data. 2081b411b363SPhilipp Reisner * Synchronize packets on sock with packets on msock. 2082b411b363SPhilipp Reisner * 2083b411b363SPhilipp Reisner * This is here so even when a P_DATA packet traveling via sock overtook an Ack 2084b411b363SPhilipp Reisner * packet traveling on msock, they are still processed in the order they have 2085b411b363SPhilipp Reisner * been sent. 2086b411b363SPhilipp Reisner * 2087b411b363SPhilipp Reisner * Note: we don't care for Ack packets overtaking P_DATA packets. 2088b411b363SPhilipp Reisner * 2089b30ab791SAndreas Gruenbacher * In case packet_seq is larger than device->peer_seq number, there are 2090b411b363SPhilipp Reisner * outstanding packets on the msock. We wait for them to arrive. 2091b30ab791SAndreas Gruenbacher * In case we are the logically next packet, we update device->peer_seq 2092b411b363SPhilipp Reisner * ourselves. Correctly handles 32bit wrap around. 2093b411b363SPhilipp Reisner * 2094b411b363SPhilipp Reisner * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, 2095b411b363SPhilipp Reisner * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds 2096b411b363SPhilipp Reisner * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have 2097b411b363SPhilipp Reisner * 1<<9 == 512 seconds aka ages for the 32bit wrap around... 2098b411b363SPhilipp Reisner * 2099b411b363SPhilipp Reisner * returns 0 if we may process the packet, 2100b411b363SPhilipp Reisner * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 210169a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) 2102b411b363SPhilipp Reisner { 210369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2104b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2105b411b363SPhilipp Reisner long timeout; 2106b874d231SPhilipp Reisner int ret = 0, tp; 21077be8da07SAndreas Gruenbacher 210869a22773SAndreas Gruenbacher if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) 21097be8da07SAndreas Gruenbacher return 0; 21107be8da07SAndreas Gruenbacher 2111b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2112b411b363SPhilipp Reisner for (;;) { 2113b30ab791SAndreas Gruenbacher if (!seq_greater(peer_seq - 1, device->peer_seq)) { 2114b30ab791SAndreas Gruenbacher device->peer_seq = seq_max(device->peer_seq, peer_seq); 2115b411b363SPhilipp Reisner break; 21167be8da07SAndreas Gruenbacher } 2117b874d231SPhilipp Reisner 2118b411b363SPhilipp Reisner if (signal_pending(current)) { 2119b411b363SPhilipp Reisner ret = -ERESTARTSYS; 2120b411b363SPhilipp Reisner break; 2121b411b363SPhilipp Reisner } 2122b874d231SPhilipp Reisner 2123b874d231SPhilipp Reisner rcu_read_lock(); 21245dd2ca19SAndreas Gruenbacher tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 2125b874d231SPhilipp Reisner rcu_read_unlock(); 2126b874d231SPhilipp Reisner 2127b874d231SPhilipp Reisner if (!tp) 2128b874d231SPhilipp Reisner break; 2129b874d231SPhilipp Reisner 2130b874d231SPhilipp Reisner /* Only need to wait if two_primaries is enabled */ 2131b30ab791SAndreas Gruenbacher prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); 2132b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 213344ed167dSPhilipp Reisner rcu_read_lock(); 213469a22773SAndreas Gruenbacher timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; 213544ed167dSPhilipp Reisner rcu_read_unlock(); 213671b1c1ebSAndreas Gruenbacher timeout = schedule_timeout(timeout); 2137b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 21387be8da07SAndreas Gruenbacher if (!timeout) { 2139b411b363SPhilipp Reisner ret = -ETIMEDOUT; 2140d0180171SAndreas Gruenbacher drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); 2141b411b363SPhilipp Reisner break; 2142b411b363SPhilipp Reisner } 2143b411b363SPhilipp Reisner } 2144b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2145b30ab791SAndreas Gruenbacher finish_wait(&device->seq_wait, &wait); 2146b411b363SPhilipp Reisner return ret; 2147b411b363SPhilipp Reisner } 2148b411b363SPhilipp Reisner 2149688593c5SLars Ellenberg /* see also bio_flags_to_wire() 2150688593c5SLars Ellenberg * DRBD_REQ_*, because we need to semantically map the flags to data packet 2151688593c5SLars Ellenberg * flags and back. We may replicate to other kernel versions. */ 215281f0ffd2SAndreas Gruenbacher static unsigned long wire_flags_to_bio(u32 dpf) 215376d2e7ecSPhilipp Reisner { 215476d2e7ecSPhilipp Reisner return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 215576d2e7ecSPhilipp Reisner (dpf & DP_FUA ? REQ_FUA : 0) | 2156688593c5SLars Ellenberg (dpf & DP_FLUSH ? REQ_FLUSH : 0) | 215776d2e7ecSPhilipp Reisner (dpf & DP_DISCARD ? REQ_DISCARD : 0); 215876d2e7ecSPhilipp Reisner } 215976d2e7ecSPhilipp Reisner 2160b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector, 21617be8da07SAndreas Gruenbacher unsigned int size) 2162b411b363SPhilipp Reisner { 21637be8da07SAndreas Gruenbacher struct drbd_interval *i; 21647be8da07SAndreas Gruenbacher 21657be8da07SAndreas Gruenbacher repeat: 2166b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 21677be8da07SAndreas Gruenbacher struct drbd_request *req; 21687be8da07SAndreas Gruenbacher struct bio_and_error m; 21697be8da07SAndreas Gruenbacher 21707be8da07SAndreas Gruenbacher if (!i->local) 21717be8da07SAndreas Gruenbacher continue; 21727be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 21737be8da07SAndreas Gruenbacher if (!(req->rq_state & RQ_POSTPONED)) 21747be8da07SAndreas Gruenbacher continue; 21757be8da07SAndreas Gruenbacher req->rq_state &= ~RQ_POSTPONED; 21767be8da07SAndreas Gruenbacher __req_mod(req, NEG_ACKED, &m); 21770500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 21787be8da07SAndreas Gruenbacher if (m.bio) 2179b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 21800500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 21817be8da07SAndreas Gruenbacher goto repeat; 21827be8da07SAndreas Gruenbacher } 21837be8da07SAndreas Gruenbacher } 21847be8da07SAndreas Gruenbacher 2185b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device, 21867be8da07SAndreas Gruenbacher struct drbd_peer_request *peer_req) 21877be8da07SAndreas Gruenbacher { 2188e33b32deSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 2189bde89a9eSAndreas Gruenbacher bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); 21907be8da07SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 21917be8da07SAndreas Gruenbacher const unsigned int size = peer_req->i.size; 21927be8da07SAndreas Gruenbacher struct drbd_interval *i; 21937be8da07SAndreas Gruenbacher bool equal; 21947be8da07SAndreas Gruenbacher int err; 21957be8da07SAndreas Gruenbacher 21967be8da07SAndreas Gruenbacher /* 21977be8da07SAndreas Gruenbacher * Inserting the peer request into the write_requests tree will prevent 21987be8da07SAndreas Gruenbacher * new conflicting local requests from being added. 21997be8da07SAndreas Gruenbacher */ 2200b30ab791SAndreas Gruenbacher drbd_insert_interval(&device->write_requests, &peer_req->i); 22017be8da07SAndreas Gruenbacher 22027be8da07SAndreas Gruenbacher repeat: 2203b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 22047be8da07SAndreas Gruenbacher if (i == &peer_req->i) 22057be8da07SAndreas Gruenbacher continue; 220608d0dabfSLars Ellenberg if (i->completed) 220708d0dabfSLars Ellenberg continue; 22087be8da07SAndreas Gruenbacher 22097be8da07SAndreas Gruenbacher if (!i->local) { 22107be8da07SAndreas Gruenbacher /* 22117be8da07SAndreas Gruenbacher * Our peer has sent a conflicting remote request; this 22127be8da07SAndreas Gruenbacher * should not happen in a two-node setup. Wait for the 22137be8da07SAndreas Gruenbacher * earlier peer request to complete. 22147be8da07SAndreas Gruenbacher */ 2215b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, i); 22167be8da07SAndreas Gruenbacher if (err) 22177be8da07SAndreas Gruenbacher goto out; 22187be8da07SAndreas Gruenbacher goto repeat; 22197be8da07SAndreas Gruenbacher } 22207be8da07SAndreas Gruenbacher 22217be8da07SAndreas Gruenbacher equal = i->sector == sector && i->size == size; 22227be8da07SAndreas Gruenbacher if (resolve_conflicts) { 22237be8da07SAndreas Gruenbacher /* 22247be8da07SAndreas Gruenbacher * If the peer request is fully contained within the 2225d4dabbe2SLars Ellenberg * overlapping request, it can be considered overwritten 2226d4dabbe2SLars Ellenberg * and thus superseded; otherwise, it will be retried 2227d4dabbe2SLars Ellenberg * once all overlapping requests have completed. 22287be8da07SAndreas Gruenbacher */ 2229d4dabbe2SLars Ellenberg bool superseded = i->sector <= sector && i->sector + 22307be8da07SAndreas Gruenbacher (i->size >> 9) >= sector + (size >> 9); 22317be8da07SAndreas Gruenbacher 22327be8da07SAndreas Gruenbacher if (!equal) 2233d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 22347be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u, " 22357be8da07SAndreas Gruenbacher "assuming %s came first\n", 22367be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 22377be8da07SAndreas Gruenbacher (unsigned long long)sector, size, 2238d4dabbe2SLars Ellenberg superseded ? "local" : "remote"); 22397be8da07SAndreas Gruenbacher 2240a8cd15baSAndreas Gruenbacher peer_req->w.cb = superseded ? e_send_superseded : 22417be8da07SAndreas Gruenbacher e_send_retry_write; 2242a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->done_ee); 2243668700b4SPhilipp Reisner queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work); 22447be8da07SAndreas Gruenbacher 22457be8da07SAndreas Gruenbacher err = -ENOENT; 22467be8da07SAndreas Gruenbacher goto out; 22477be8da07SAndreas Gruenbacher } else { 22487be8da07SAndreas Gruenbacher struct drbd_request *req = 22497be8da07SAndreas Gruenbacher container_of(i, struct drbd_request, i); 22507be8da07SAndreas Gruenbacher 22517be8da07SAndreas Gruenbacher if (!equal) 2252d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 22537be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u\n", 22547be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 22557be8da07SAndreas Gruenbacher (unsigned long long)sector, size); 22567be8da07SAndreas Gruenbacher 22577be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 22587be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) { 22597be8da07SAndreas Gruenbacher /* 22607be8da07SAndreas Gruenbacher * Wait for the node with the discard flag to 2261d4dabbe2SLars Ellenberg * decide if this request has been superseded 2262d4dabbe2SLars Ellenberg * or needs to be retried. 2263d4dabbe2SLars Ellenberg * Requests that have been superseded will 22647be8da07SAndreas Gruenbacher * disappear from the write_requests tree. 22657be8da07SAndreas Gruenbacher * 22667be8da07SAndreas Gruenbacher * In addition, wait for the conflicting 22677be8da07SAndreas Gruenbacher * request to finish locally before submitting 22687be8da07SAndreas Gruenbacher * the conflicting peer request. 22697be8da07SAndreas Gruenbacher */ 2270b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, &req->i); 22717be8da07SAndreas Gruenbacher if (err) { 2272e33b32deSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 2273b30ab791SAndreas Gruenbacher fail_postponed_requests(device, sector, size); 22747be8da07SAndreas Gruenbacher goto out; 22757be8da07SAndreas Gruenbacher } 22767be8da07SAndreas Gruenbacher goto repeat; 22777be8da07SAndreas Gruenbacher } 22787be8da07SAndreas Gruenbacher /* 22797be8da07SAndreas Gruenbacher * Remember to restart the conflicting requests after 22807be8da07SAndreas Gruenbacher * the new peer request has completed. 22817be8da07SAndreas Gruenbacher */ 22827be8da07SAndreas Gruenbacher peer_req->flags |= EE_RESTART_REQUESTS; 22837be8da07SAndreas Gruenbacher } 22847be8da07SAndreas Gruenbacher } 22857be8da07SAndreas Gruenbacher err = 0; 22867be8da07SAndreas Gruenbacher 22877be8da07SAndreas Gruenbacher out: 22887be8da07SAndreas Gruenbacher if (err) 2289b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 22907be8da07SAndreas Gruenbacher return err; 22917be8da07SAndreas Gruenbacher } 22927be8da07SAndreas Gruenbacher 2293b411b363SPhilipp Reisner /* mirrored write */ 2294bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi) 2295b411b363SPhilipp Reisner { 22969f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2297b30ab791SAndreas Gruenbacher struct drbd_device *device; 229821ae5d7fSLars Ellenberg struct net_conf *nc; 2299b411b363SPhilipp Reisner sector_t sector; 2300db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2301e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 23027be8da07SAndreas Gruenbacher u32 peer_seq = be32_to_cpu(p->seq_num); 2303b411b363SPhilipp Reisner int rw = WRITE; 2304b411b363SPhilipp Reisner u32 dp_flags; 2305302bdeaeSPhilipp Reisner int err, tp; 23067be8da07SAndreas Gruenbacher 23079f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 23089f4fe9adSAndreas Gruenbacher if (!peer_device) 23094a76b161SAndreas Gruenbacher return -EIO; 23109f4fe9adSAndreas Gruenbacher device = peer_device->device; 2311b411b363SPhilipp Reisner 2312b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 231382bc0194SAndreas Gruenbacher int err2; 2314b411b363SPhilipp Reisner 231569a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 231669a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 2317bde89a9eSAndreas Gruenbacher atomic_inc(&connection->current_epoch->epoch_size); 231869a22773SAndreas Gruenbacher err2 = drbd_drain_block(peer_device, pi->size); 231982bc0194SAndreas Gruenbacher if (!err) 232082bc0194SAndreas Gruenbacher err = err2; 232182bc0194SAndreas Gruenbacher return err; 2322b411b363SPhilipp Reisner } 2323b411b363SPhilipp Reisner 2324fcefa62eSAndreas Gruenbacher /* 2325fcefa62eSAndreas Gruenbacher * Corresponding put_ldev done either below (on various errors), or in 2326fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio, if we successfully submit the data at the 2327fcefa62eSAndreas Gruenbacher * end of this function. 2328fcefa62eSAndreas Gruenbacher */ 2329b411b363SPhilipp Reisner 2330b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2331a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, p->block_id, sector, pi); 2332db830c46SAndreas Gruenbacher if (!peer_req) { 2333b30ab791SAndreas Gruenbacher put_ldev(device); 233482bc0194SAndreas Gruenbacher return -EIO; 2335b411b363SPhilipp Reisner } 2336b411b363SPhilipp Reisner 2337a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_block; 233821ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 233921ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION; 2340b411b363SPhilipp Reisner 2341688593c5SLars Ellenberg dp_flags = be32_to_cpu(p->dp_flags); 234281f0ffd2SAndreas Gruenbacher rw |= wire_flags_to_bio(dp_flags); 2343a0fb3c47SLars Ellenberg if (pi->cmd == P_TRIM) { 2344a0fb3c47SLars Ellenberg struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); 2345a0fb3c47SLars Ellenberg peer_req->flags |= EE_IS_TRIM; 2346a0fb3c47SLars Ellenberg if (!blk_queue_discard(q)) 2347a0fb3c47SLars Ellenberg peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; 2348a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0); 2349a0fb3c47SLars Ellenberg D_ASSERT(peer_device, rw & REQ_DISCARD); 2350a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL); 2351a0fb3c47SLars Ellenberg } else if (peer_req->pages == NULL) { 23520b0ba1efSAndreas Gruenbacher D_ASSERT(device, peer_req->i.size == 0); 23530b0ba1efSAndreas Gruenbacher D_ASSERT(device, dp_flags & DP_FLUSH); 2354a73ff323SLars Ellenberg } 2355688593c5SLars Ellenberg 2356688593c5SLars Ellenberg if (dp_flags & DP_MAY_SET_IN_SYNC) 2357db830c46SAndreas Gruenbacher peer_req->flags |= EE_MAY_SET_IN_SYNC; 2358688593c5SLars Ellenberg 2359bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 2360bde89a9eSAndreas Gruenbacher peer_req->epoch = connection->current_epoch; 2361db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->epoch_size); 2362db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->active); 2363bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 2364b411b363SPhilipp Reisner 2365302bdeaeSPhilipp Reisner rcu_read_lock(); 236621ae5d7fSLars Ellenberg nc = rcu_dereference(peer_device->connection->net_conf); 236721ae5d7fSLars Ellenberg tp = nc->two_primaries; 236821ae5d7fSLars Ellenberg if (peer_device->connection->agreed_pro_version < 100) { 236921ae5d7fSLars Ellenberg switch (nc->wire_protocol) { 237021ae5d7fSLars Ellenberg case DRBD_PROT_C: 237121ae5d7fSLars Ellenberg dp_flags |= DP_SEND_WRITE_ACK; 237221ae5d7fSLars Ellenberg break; 237321ae5d7fSLars Ellenberg case DRBD_PROT_B: 237421ae5d7fSLars Ellenberg dp_flags |= DP_SEND_RECEIVE_ACK; 237521ae5d7fSLars Ellenberg break; 237621ae5d7fSLars Ellenberg } 237721ae5d7fSLars Ellenberg } 2378302bdeaeSPhilipp Reisner rcu_read_unlock(); 237921ae5d7fSLars Ellenberg 238021ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_WRITE_ACK) { 238121ae5d7fSLars Ellenberg peer_req->flags |= EE_SEND_WRITE_ACK; 238221ae5d7fSLars Ellenberg inc_unacked(device); 238321ae5d7fSLars Ellenberg /* corresponding dec_unacked() in e_end_block() 238421ae5d7fSLars Ellenberg * respective _drbd_clear_done_ee */ 238521ae5d7fSLars Ellenberg } 238621ae5d7fSLars Ellenberg 238721ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_RECEIVE_ACK) { 238821ae5d7fSLars Ellenberg /* I really don't like it that the receiver thread 238921ae5d7fSLars Ellenberg * sends on the msock, but anyways */ 23905dd2ca19SAndreas Gruenbacher drbd_send_ack(peer_device, P_RECV_ACK, peer_req); 239121ae5d7fSLars Ellenberg } 239221ae5d7fSLars Ellenberg 2393302bdeaeSPhilipp Reisner if (tp) { 239421ae5d7fSLars Ellenberg /* two primaries implies protocol C */ 239521ae5d7fSLars Ellenberg D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); 2396302bdeaeSPhilipp Reisner peer_req->flags |= EE_IN_INTERVAL_TREE; 239769a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 23987be8da07SAndreas Gruenbacher if (err) 2399b411b363SPhilipp Reisner goto out_interrupted; 24000500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2401b30ab791SAndreas Gruenbacher err = handle_write_conflicts(device, peer_req); 24027be8da07SAndreas Gruenbacher if (err) { 24030500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 24047be8da07SAndreas Gruenbacher if (err == -ENOENT) { 2405b30ab791SAndreas Gruenbacher put_ldev(device); 240682bc0194SAndreas Gruenbacher return 0; 2407b411b363SPhilipp Reisner } 2408b411b363SPhilipp Reisner goto out_interrupted; 2409b411b363SPhilipp Reisner } 2410b874d231SPhilipp Reisner } else { 241169a22773SAndreas Gruenbacher update_peer_seq(peer_device, peer_seq); 24120500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2413b874d231SPhilipp Reisner } 2414a0fb3c47SLars Ellenberg /* if we use the zeroout fallback code, we process synchronously 2415a0fb3c47SLars Ellenberg * and we wait for all pending requests, respectively wait for 2416a0fb3c47SLars Ellenberg * active_ee to become empty in drbd_submit_peer_request(); 2417a0fb3c47SLars Ellenberg * better not add ourselves here. */ 2418a0fb3c47SLars Ellenberg if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) 2419b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee); 24200500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2421b411b363SPhilipp Reisner 2422b30ab791SAndreas Gruenbacher if (device->state.conn == C_SYNC_TARGET) 2423b30ab791SAndreas Gruenbacher wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 2424b6a370baSPhilipp Reisner 2425b30ab791SAndreas Gruenbacher if (device->state.pdsk < D_INCONSISTENT) { 2426b411b363SPhilipp Reisner /* In case we have the only disk of the cluster, */ 2427b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 2428db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 24294dd726f0SLars Ellenberg drbd_al_begin_io(device, &peer_req->i); 243021ae5d7fSLars Ellenberg peer_req->flags |= EE_CALL_AL_COMPLETE_IO; 2431b411b363SPhilipp Reisner } 2432b411b363SPhilipp Reisner 2433b30ab791SAndreas Gruenbacher err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); 243482bc0194SAndreas Gruenbacher if (!err) 243582bc0194SAndreas Gruenbacher return 0; 2436b411b363SPhilipp Reisner 243710f6d992SLars Ellenberg /* don't care for the reason here */ 2438d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 24390500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2440a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 2441b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 24420500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 244321ae5d7fSLars Ellenberg if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { 244421ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 2445b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &peer_req->i); 244621ae5d7fSLars Ellenberg } 244722cc37a9SLars Ellenberg 2448b411b363SPhilipp Reisner out_interrupted: 2449bde89a9eSAndreas Gruenbacher drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); 2450b30ab791SAndreas Gruenbacher put_ldev(device); 2451b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 245282bc0194SAndreas Gruenbacher return err; 2453b411b363SPhilipp Reisner } 2454b411b363SPhilipp Reisner 24550f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy, 24560f0601f4SLars Ellenberg * and current sync rate is above c_min_rate. 24570f0601f4SLars Ellenberg * 24580f0601f4SLars Ellenberg * To decide whether or not the lower device is busy, we use a scheme similar 24590f0601f4SLars Ellenberg * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" 24600f0601f4SLars Ellenberg * (more than 64 sectors) of activity we cannot account for with our own resync 24610f0601f4SLars Ellenberg * activity, it obviously is "busy". 24620f0601f4SLars Ellenberg * 24630f0601f4SLars Ellenberg * The current sync rate used here uses only the most recent two step marks, 24640f0601f4SLars Ellenberg * to have a short time average so we can react faster. 24650f0601f4SLars Ellenberg */ 2466ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, 2467ad3fee79SLars Ellenberg bool throttle_if_app_is_waiting) 2468e8299874SLars Ellenberg { 2469e8299874SLars Ellenberg struct lc_element *tmp; 2470ad3fee79SLars Ellenberg bool throttle = drbd_rs_c_min_rate_throttle(device); 2471e8299874SLars Ellenberg 2472ad3fee79SLars Ellenberg if (!throttle || throttle_if_app_is_waiting) 2473ad3fee79SLars Ellenberg return throttle; 2474e8299874SLars Ellenberg 2475e8299874SLars Ellenberg spin_lock_irq(&device->al_lock); 2476e8299874SLars Ellenberg tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 2477e8299874SLars Ellenberg if (tmp) { 2478e8299874SLars Ellenberg struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 2479e8299874SLars Ellenberg if (test_bit(BME_PRIORITY, &bm_ext->flags)) 2480e8299874SLars Ellenberg throttle = false; 2481ad3fee79SLars Ellenberg /* Do not slow down if app IO is already waiting for this extent, 2482ad3fee79SLars Ellenberg * and our progress is necessary for application IO to complete. */ 2483e8299874SLars Ellenberg } 2484e8299874SLars Ellenberg spin_unlock_irq(&device->al_lock); 2485e8299874SLars Ellenberg 2486e8299874SLars Ellenberg return throttle; 2487e8299874SLars Ellenberg } 2488e8299874SLars Ellenberg 2489e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) 24900f0601f4SLars Ellenberg { 2491b30ab791SAndreas Gruenbacher struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; 24920f0601f4SLars Ellenberg unsigned long db, dt, dbdt; 2493daeda1ccSPhilipp Reisner unsigned int c_min_rate; 2494e8299874SLars Ellenberg int curr_events; 2495daeda1ccSPhilipp Reisner 2496daeda1ccSPhilipp Reisner rcu_read_lock(); 2497b30ab791SAndreas Gruenbacher c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; 2498daeda1ccSPhilipp Reisner rcu_read_unlock(); 24990f0601f4SLars Ellenberg 25000f0601f4SLars Ellenberg /* feature disabled? */ 2501daeda1ccSPhilipp Reisner if (c_min_rate == 0) 2502e8299874SLars Ellenberg return false; 2503e3555d85SPhilipp Reisner 25040f0601f4SLars Ellenberg curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + 25050f0601f4SLars Ellenberg (int)part_stat_read(&disk->part0, sectors[1]) - 2506b30ab791SAndreas Gruenbacher atomic_read(&device->rs_sect_ev); 2507ad3fee79SLars Ellenberg 2508ad3fee79SLars Ellenberg if (atomic_read(&device->ap_actlog_cnt) 2509ff8bd88bSLars Ellenberg || curr_events - device->rs_last_events > 64) { 25100f0601f4SLars Ellenberg unsigned long rs_left; 25110f0601f4SLars Ellenberg int i; 25120f0601f4SLars Ellenberg 2513b30ab791SAndreas Gruenbacher device->rs_last_events = curr_events; 25140f0601f4SLars Ellenberg 25150f0601f4SLars Ellenberg /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 25160f0601f4SLars Ellenberg * approx. */ 2517b30ab791SAndreas Gruenbacher i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; 25182649f080SLars Ellenberg 2519b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 2520b30ab791SAndreas Gruenbacher rs_left = device->ov_left; 25212649f080SLars Ellenberg else 2522b30ab791SAndreas Gruenbacher rs_left = drbd_bm_total_weight(device) - device->rs_failed; 25230f0601f4SLars Ellenberg 2524b30ab791SAndreas Gruenbacher dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; 25250f0601f4SLars Ellenberg if (!dt) 25260f0601f4SLars Ellenberg dt++; 2527b30ab791SAndreas Gruenbacher db = device->rs_mark_left[i] - rs_left; 25280f0601f4SLars Ellenberg dbdt = Bit2KB(db/dt); 25290f0601f4SLars Ellenberg 2530daeda1ccSPhilipp Reisner if (dbdt > c_min_rate) 2531e8299874SLars Ellenberg return true; 25320f0601f4SLars Ellenberg } 2533e8299874SLars Ellenberg return false; 25340f0601f4SLars Ellenberg } 25350f0601f4SLars Ellenberg 2536bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) 2537b411b363SPhilipp Reisner { 25389f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2539b30ab791SAndreas Gruenbacher struct drbd_device *device; 2540b411b363SPhilipp Reisner sector_t sector; 25414a76b161SAndreas Gruenbacher sector_t capacity; 2542db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2543b411b363SPhilipp Reisner struct digest_info *di = NULL; 2544b18b37beSPhilipp Reisner int size, verb; 2545b411b363SPhilipp Reisner unsigned int fault_type; 2546e658983aSAndreas Gruenbacher struct p_block_req *p = pi->data; 25474a76b161SAndreas Gruenbacher 25489f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 25499f4fe9adSAndreas Gruenbacher if (!peer_device) 25504a76b161SAndreas Gruenbacher return -EIO; 25519f4fe9adSAndreas Gruenbacher device = peer_device->device; 2552b30ab791SAndreas Gruenbacher capacity = drbd_get_capacity(device->this_bdev); 2553b411b363SPhilipp Reisner 2554b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2555b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 2556b411b363SPhilipp Reisner 2557c670a398SAndreas Gruenbacher if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { 2558d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2559b411b363SPhilipp Reisner (unsigned long long)sector, size); 256082bc0194SAndreas Gruenbacher return -EINVAL; 2561b411b363SPhilipp Reisner } 2562b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) { 2563d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2564b411b363SPhilipp Reisner (unsigned long long)sector, size); 256582bc0194SAndreas Gruenbacher return -EINVAL; 2566b411b363SPhilipp Reisner } 2567b411b363SPhilipp Reisner 2568b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_UP_TO_DATE)) { 2569b18b37beSPhilipp Reisner verb = 1; 2570e2857216SAndreas Gruenbacher switch (pi->cmd) { 2571b18b37beSPhilipp Reisner case P_DATA_REQUEST: 257269a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); 2573b18b37beSPhilipp Reisner break; 2574b18b37beSPhilipp Reisner case P_RS_DATA_REQUEST: 2575b18b37beSPhilipp Reisner case P_CSUM_RS_REQUEST: 2576b18b37beSPhilipp Reisner case P_OV_REQUEST: 257769a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); 2578b18b37beSPhilipp Reisner break; 2579b18b37beSPhilipp Reisner case P_OV_REPLY: 2580b18b37beSPhilipp Reisner verb = 0; 2581b30ab791SAndreas Gruenbacher dec_rs_pending(device); 258269a22773SAndreas Gruenbacher drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); 2583b18b37beSPhilipp Reisner break; 2584b18b37beSPhilipp Reisner default: 258549ba9b1bSAndreas Gruenbacher BUG(); 2586b18b37beSPhilipp Reisner } 2587b18b37beSPhilipp Reisner if (verb && __ratelimit(&drbd_ratelimit_state)) 2588d0180171SAndreas Gruenbacher drbd_err(device, "Can not satisfy peer's read request, " 2589b411b363SPhilipp Reisner "no local data.\n"); 2590b18b37beSPhilipp Reisner 2591a821cc4aSLars Ellenberg /* drain possibly payload */ 259269a22773SAndreas Gruenbacher return drbd_drain_block(peer_device, pi->size); 2593b411b363SPhilipp Reisner } 2594b411b363SPhilipp Reisner 2595b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2596b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 2597b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 2598a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, 2599a0fb3c47SLars Ellenberg true /* has real payload */, GFP_NOIO); 2600db830c46SAndreas Gruenbacher if (!peer_req) { 2601b30ab791SAndreas Gruenbacher put_ldev(device); 260282bc0194SAndreas Gruenbacher return -ENOMEM; 2603b411b363SPhilipp Reisner } 2604b411b363SPhilipp Reisner 2605e2857216SAndreas Gruenbacher switch (pi->cmd) { 2606b411b363SPhilipp Reisner case P_DATA_REQUEST: 2607a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_data_req; 2608b411b363SPhilipp Reisner fault_type = DRBD_FAULT_DT_RD; 260980a40e43SLars Ellenberg /* application IO, don't drbd_rs_begin_io */ 261021ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION; 261180a40e43SLars Ellenberg goto submit; 261280a40e43SLars Ellenberg 2613b411b363SPhilipp Reisner case P_RS_DATA_REQUEST: 2614a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_rsdata_req; 2615b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 26165f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2617b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2618b411b363SPhilipp Reisner break; 2619b411b363SPhilipp Reisner 2620b411b363SPhilipp Reisner case P_OV_REPLY: 2621b411b363SPhilipp Reisner case P_CSUM_RS_REQUEST: 2622b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2623e2857216SAndreas Gruenbacher di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); 2624b411b363SPhilipp Reisner if (!di) 2625b411b363SPhilipp Reisner goto out_free_e; 2626b411b363SPhilipp Reisner 2627e2857216SAndreas Gruenbacher di->digest_size = pi->size; 2628b411b363SPhilipp Reisner di->digest = (((char *)di)+sizeof(struct digest_info)); 2629b411b363SPhilipp Reisner 2630db830c46SAndreas Gruenbacher peer_req->digest = di; 2631db830c46SAndreas Gruenbacher peer_req->flags |= EE_HAS_DIGEST; 2632c36c3cedSLars Ellenberg 26339f4fe9adSAndreas Gruenbacher if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) 2634b411b363SPhilipp Reisner goto out_free_e; 2635b411b363SPhilipp Reisner 2636e2857216SAndreas Gruenbacher if (pi->cmd == P_CSUM_RS_REQUEST) { 26379f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 2638a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_csum_rs_req; 26395f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2640b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2641aaaba345SLars Ellenberg /* remember to report stats in drbd_resync_finished */ 2642aaaba345SLars Ellenberg device->use_csums = true; 2643e2857216SAndreas Gruenbacher } else if (pi->cmd == P_OV_REPLY) { 26442649f080SLars Ellenberg /* track progress, we may need to throttle */ 2645b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_in); 2646a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_reply; 2647b30ab791SAndreas Gruenbacher dec_rs_pending(device); 26480f0601f4SLars Ellenberg /* drbd_rs_begin_io done when we sent this request, 26490f0601f4SLars Ellenberg * but accounting still needs to be done. */ 26500f0601f4SLars Ellenberg goto submit_for_resync; 2651b411b363SPhilipp Reisner } 2652b411b363SPhilipp Reisner break; 2653b411b363SPhilipp Reisner 2654b411b363SPhilipp Reisner case P_OV_REQUEST: 2655b30ab791SAndreas Gruenbacher if (device->ov_start_sector == ~(sector_t)0 && 26569f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90) { 2657de228bbaSLars Ellenberg unsigned long now = jiffies; 2658de228bbaSLars Ellenberg int i; 2659b30ab791SAndreas Gruenbacher device->ov_start_sector = sector; 2660b30ab791SAndreas Gruenbacher device->ov_position = sector; 2661b30ab791SAndreas Gruenbacher device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); 2662b30ab791SAndreas Gruenbacher device->rs_total = device->ov_left; 2663de228bbaSLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 2664b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = device->ov_left; 2665b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 2666de228bbaSLars Ellenberg } 2667d0180171SAndreas Gruenbacher drbd_info(device, "Online Verify start sector: %llu\n", 2668b411b363SPhilipp Reisner (unsigned long long)sector); 2669b411b363SPhilipp Reisner } 2670a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_req; 2671b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2672b411b363SPhilipp Reisner break; 2673b411b363SPhilipp Reisner 2674b411b363SPhilipp Reisner default: 267549ba9b1bSAndreas Gruenbacher BUG(); 2676b411b363SPhilipp Reisner } 2677b411b363SPhilipp Reisner 26780f0601f4SLars Ellenberg /* Throttle, drbd_rs_begin_io and submit should become asynchronous 26790f0601f4SLars Ellenberg * wrt the receiver, but it is not as straightforward as it may seem. 26800f0601f4SLars Ellenberg * Various places in the resync start and stop logic assume resync 26810f0601f4SLars Ellenberg * requests are processed in order, requeuing this on the worker thread 26820f0601f4SLars Ellenberg * introduces a bunch of new code for synchronization between threads. 26830f0601f4SLars Ellenberg * 26840f0601f4SLars Ellenberg * Unlimited throttling before drbd_rs_begin_io may stall the resync 26850f0601f4SLars Ellenberg * "forever", throttling after drbd_rs_begin_io will lock that extent 26860f0601f4SLars Ellenberg * for application writes for the same time. For now, just throttle 26870f0601f4SLars Ellenberg * here, where the rest of the code expects the receiver to sleep for 26880f0601f4SLars Ellenberg * a while, anyways. 26890f0601f4SLars Ellenberg */ 2690b411b363SPhilipp Reisner 26910f0601f4SLars Ellenberg /* Throttle before drbd_rs_begin_io, as that locks out application IO; 26920f0601f4SLars Ellenberg * this defers syncer requests for some time, before letting at least 26930f0601f4SLars Ellenberg * on request through. The resync controller on the receiving side 26940f0601f4SLars Ellenberg * will adapt to the incoming rate accordingly. 26950f0601f4SLars Ellenberg * 26960f0601f4SLars Ellenberg * We cannot throttle here if remote is Primary/SyncTarget: 26970f0601f4SLars Ellenberg * we would also throttle its application reads. 26980f0601f4SLars Ellenberg * In that case, throttling is done on the SyncTarget only. 26990f0601f4SLars Ellenberg */ 2700c5a2c150SLars Ellenberg 2701c5a2c150SLars Ellenberg /* Even though this may be a resync request, we do add to "read_ee"; 2702c5a2c150SLars Ellenberg * "sync_ee" is only used for resync WRITEs. 2703c5a2c150SLars Ellenberg * Add to list early, so debugfs can find this request 2704c5a2c150SLars Ellenberg * even if we have to sleep below. */ 2705c5a2c150SLars Ellenberg spin_lock_irq(&device->resource->req_lock); 2706c5a2c150SLars Ellenberg list_add_tail(&peer_req->w.list, &device->read_ee); 2707c5a2c150SLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 2708c5a2c150SLars Ellenberg 2709944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_should_slow_down); 2710ad3fee79SLars Ellenberg if (device->state.peer != R_PRIMARY 2711ad3fee79SLars Ellenberg && drbd_rs_should_slow_down(device, sector, false)) 2712e3555d85SPhilipp Reisner schedule_timeout_uninterruptible(HZ/10); 2713944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_begin_io); 2714b30ab791SAndreas Gruenbacher if (drbd_rs_begin_io(device, sector)) 271580a40e43SLars Ellenberg goto out_free_e; 2716b411b363SPhilipp Reisner 27170f0601f4SLars Ellenberg submit_for_resync: 2718b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 27190f0601f4SLars Ellenberg 272080a40e43SLars Ellenberg submit: 2721944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_submit_peer_request); 2722b30ab791SAndreas Gruenbacher inc_unacked(device); 2723b30ab791SAndreas Gruenbacher if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) 272482bc0194SAndreas Gruenbacher return 0; 2725b411b363SPhilipp Reisner 272610f6d992SLars Ellenberg /* don't care for the reason here */ 2727d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 2728c5a2c150SLars Ellenberg 2729c5a2c150SLars Ellenberg out_free_e: 27300500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2731a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 27320500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 273322cc37a9SLars Ellenberg /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 273422cc37a9SLars Ellenberg 2735b30ab791SAndreas Gruenbacher put_ldev(device); 2736b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 273782bc0194SAndreas Gruenbacher return -EIO; 2738b411b363SPhilipp Reisner } 2739b411b363SPhilipp Reisner 274069a22773SAndreas Gruenbacher /** 274169a22773SAndreas Gruenbacher * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries 274269a22773SAndreas Gruenbacher */ 274369a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) 2744b411b363SPhilipp Reisner { 274569a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2746b411b363SPhilipp Reisner int self, peer, rv = -100; 2747b411b363SPhilipp Reisner unsigned long ch_self, ch_peer; 274844ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_0p; 2749b411b363SPhilipp Reisner 2750b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & 1; 2751b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & 1; 2752b411b363SPhilipp Reisner 2753b30ab791SAndreas Gruenbacher ch_peer = device->p_uuid[UI_SIZE]; 2754b30ab791SAndreas Gruenbacher ch_self = device->comm_bm_set; 2755b411b363SPhilipp Reisner 275644ed167dSPhilipp Reisner rcu_read_lock(); 275769a22773SAndreas Gruenbacher after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; 275844ed167dSPhilipp Reisner rcu_read_unlock(); 275944ed167dSPhilipp Reisner switch (after_sb_0p) { 2760b411b363SPhilipp Reisner case ASB_CONSENSUS: 2761b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 2762b411b363SPhilipp Reisner case ASB_CALL_HELPER: 276344ed167dSPhilipp Reisner case ASB_VIOLENTLY: 2764d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2765b411b363SPhilipp Reisner break; 2766b411b363SPhilipp Reisner case ASB_DISCONNECT: 2767b411b363SPhilipp Reisner break; 2768b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2769b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 2770b411b363SPhilipp Reisner rv = -1; 2771b411b363SPhilipp Reisner break; 2772b411b363SPhilipp Reisner } 2773b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 2774b411b363SPhilipp Reisner rv = 1; 2775b411b363SPhilipp Reisner break; 2776b411b363SPhilipp Reisner } 2777b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */ 2778b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2779b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 2780b411b363SPhilipp Reisner rv = 1; 2781b411b363SPhilipp Reisner break; 2782b411b363SPhilipp Reisner } 2783b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 2784b411b363SPhilipp Reisner rv = -1; 2785b411b363SPhilipp Reisner break; 2786b411b363SPhilipp Reisner } 2787b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */ 2788d0180171SAndreas Gruenbacher drbd_warn(device, "Discard younger/older primary did not find a decision\n" 2789b411b363SPhilipp Reisner "Using discard-least-changes instead\n"); 2790b411b363SPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2791b411b363SPhilipp Reisner if (ch_peer == 0 && ch_self == 0) { 279269a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 2793b411b363SPhilipp Reisner ? -1 : 1; 2794b411b363SPhilipp Reisner break; 2795b411b363SPhilipp Reisner } else { 2796b411b363SPhilipp Reisner if (ch_peer == 0) { rv = 1; break; } 2797b411b363SPhilipp Reisner if (ch_self == 0) { rv = -1; break; } 2798b411b363SPhilipp Reisner } 279944ed167dSPhilipp Reisner if (after_sb_0p == ASB_DISCARD_ZERO_CHG) 2800b411b363SPhilipp Reisner break; 2801b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2802b411b363SPhilipp Reisner if (ch_self < ch_peer) 2803b411b363SPhilipp Reisner rv = -1; 2804b411b363SPhilipp Reisner else if (ch_self > ch_peer) 2805b411b363SPhilipp Reisner rv = 1; 2806b411b363SPhilipp Reisner else /* ( ch_self == ch_peer ) */ 2807b411b363SPhilipp Reisner /* Well, then use something else. */ 280869a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 2809b411b363SPhilipp Reisner ? -1 : 1; 2810b411b363SPhilipp Reisner break; 2811b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2812b411b363SPhilipp Reisner rv = -1; 2813b411b363SPhilipp Reisner break; 2814b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 2815b411b363SPhilipp Reisner rv = 1; 2816b411b363SPhilipp Reisner } 2817b411b363SPhilipp Reisner 2818b411b363SPhilipp Reisner return rv; 2819b411b363SPhilipp Reisner } 2820b411b363SPhilipp Reisner 282169a22773SAndreas Gruenbacher /** 282269a22773SAndreas Gruenbacher * drbd_asb_recover_1p - Recover after split-brain with one remaining primary 282369a22773SAndreas Gruenbacher */ 282469a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) 2825b411b363SPhilipp Reisner { 282669a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 28276184ea21SAndreas Gruenbacher int hg, rv = -100; 282844ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_1p; 2829b411b363SPhilipp Reisner 283044ed167dSPhilipp Reisner rcu_read_lock(); 283169a22773SAndreas Gruenbacher after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; 283244ed167dSPhilipp Reisner rcu_read_unlock(); 283344ed167dSPhilipp Reisner switch (after_sb_1p) { 2834b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2835b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2836b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2837b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2838b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 283944ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2840d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2841b411b363SPhilipp Reisner break; 2842b411b363SPhilipp Reisner case ASB_DISCONNECT: 2843b411b363SPhilipp Reisner break; 2844b411b363SPhilipp Reisner case ASB_CONSENSUS: 284569a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2846b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_SECONDARY) 2847b411b363SPhilipp Reisner rv = hg; 2848b30ab791SAndreas Gruenbacher if (hg == 1 && device->state.role == R_PRIMARY) 2849b411b363SPhilipp Reisner rv = hg; 2850b411b363SPhilipp Reisner break; 2851b411b363SPhilipp Reisner case ASB_VIOLENTLY: 285269a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 2853b411b363SPhilipp Reisner break; 2854b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 2855b30ab791SAndreas Gruenbacher return device->state.role == R_PRIMARY ? 1 : -1; 2856b411b363SPhilipp Reisner case ASB_CALL_HELPER: 285769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2858b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_PRIMARY) { 2859bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 2860bb437946SAndreas Gruenbacher 2861b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2862b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 2863b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 2864b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 2865bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 2866b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 2867b411b363SPhilipp Reisner } else { 2868d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 2869b411b363SPhilipp Reisner rv = hg; 2870b411b363SPhilipp Reisner } 2871b411b363SPhilipp Reisner } else 2872b411b363SPhilipp Reisner rv = hg; 2873b411b363SPhilipp Reisner } 2874b411b363SPhilipp Reisner 2875b411b363SPhilipp Reisner return rv; 2876b411b363SPhilipp Reisner } 2877b411b363SPhilipp Reisner 287869a22773SAndreas Gruenbacher /** 287969a22773SAndreas Gruenbacher * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries 288069a22773SAndreas Gruenbacher */ 288169a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) 2882b411b363SPhilipp Reisner { 288369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 28846184ea21SAndreas Gruenbacher int hg, rv = -100; 288544ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_2p; 2886b411b363SPhilipp Reisner 288744ed167dSPhilipp Reisner rcu_read_lock(); 288869a22773SAndreas Gruenbacher after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; 288944ed167dSPhilipp Reisner rcu_read_unlock(); 289044ed167dSPhilipp Reisner switch (after_sb_2p) { 2891b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 2892b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 2893b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 2894b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 2895b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 2896b411b363SPhilipp Reisner case ASB_CONSENSUS: 2897b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 289844ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 2899d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 2900b411b363SPhilipp Reisner break; 2901b411b363SPhilipp Reisner case ASB_VIOLENTLY: 290269a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 2903b411b363SPhilipp Reisner break; 2904b411b363SPhilipp Reisner case ASB_DISCONNECT: 2905b411b363SPhilipp Reisner break; 2906b411b363SPhilipp Reisner case ASB_CALL_HELPER: 290769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 2908b411b363SPhilipp Reisner if (hg == -1) { 2909bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 2910bb437946SAndreas Gruenbacher 2911b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 2912b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 2913b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 2914b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 2915bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 2916b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 2917b411b363SPhilipp Reisner } else { 2918d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 2919b411b363SPhilipp Reisner rv = hg; 2920b411b363SPhilipp Reisner } 2921b411b363SPhilipp Reisner } else 2922b411b363SPhilipp Reisner rv = hg; 2923b411b363SPhilipp Reisner } 2924b411b363SPhilipp Reisner 2925b411b363SPhilipp Reisner return rv; 2926b411b363SPhilipp Reisner } 2927b411b363SPhilipp Reisner 2928b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, 2929b411b363SPhilipp Reisner u64 bits, u64 flags) 2930b411b363SPhilipp Reisner { 2931b411b363SPhilipp Reisner if (!uuid) { 2932d0180171SAndreas Gruenbacher drbd_info(device, "%s uuid info vanished while I was looking!\n", text); 2933b411b363SPhilipp Reisner return; 2934b411b363SPhilipp Reisner } 2935d0180171SAndreas Gruenbacher drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", 2936b411b363SPhilipp Reisner text, 2937b411b363SPhilipp Reisner (unsigned long long)uuid[UI_CURRENT], 2938b411b363SPhilipp Reisner (unsigned long long)uuid[UI_BITMAP], 2939b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_START], 2940b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_END], 2941b411b363SPhilipp Reisner (unsigned long long)bits, 2942b411b363SPhilipp Reisner (unsigned long long)flags); 2943b411b363SPhilipp Reisner } 2944b411b363SPhilipp Reisner 2945b411b363SPhilipp Reisner /* 2946b411b363SPhilipp Reisner 100 after split brain try auto recover 2947b411b363SPhilipp Reisner 2 C_SYNC_SOURCE set BitMap 2948b411b363SPhilipp Reisner 1 C_SYNC_SOURCE use BitMap 2949b411b363SPhilipp Reisner 0 no Sync 2950b411b363SPhilipp Reisner -1 C_SYNC_TARGET use BitMap 2951b411b363SPhilipp Reisner -2 C_SYNC_TARGET set BitMap 2952b411b363SPhilipp Reisner -100 after split brain, disconnect 2953b411b363SPhilipp Reisner -1000 unrelated data 29544a23f264SPhilipp Reisner -1091 requires proto 91 29554a23f264SPhilipp Reisner -1096 requires proto 96 2956b411b363SPhilipp Reisner */ 295744a4d551SLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local) 2958b411b363SPhilipp Reisner { 295944a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 296044a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 2961b411b363SPhilipp Reisner u64 self, peer; 2962b411b363SPhilipp Reisner int i, j; 2963b411b363SPhilipp Reisner 2964b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 2965b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 2966b411b363SPhilipp Reisner 2967b411b363SPhilipp Reisner *rule_nr = 10; 2968b411b363SPhilipp Reisner if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) 2969b411b363SPhilipp Reisner return 0; 2970b411b363SPhilipp Reisner 2971b411b363SPhilipp Reisner *rule_nr = 20; 2972b411b363SPhilipp Reisner if ((self == UUID_JUST_CREATED || self == (u64)0) && 2973b411b363SPhilipp Reisner peer != UUID_JUST_CREATED) 2974b411b363SPhilipp Reisner return -2; 2975b411b363SPhilipp Reisner 2976b411b363SPhilipp Reisner *rule_nr = 30; 2977b411b363SPhilipp Reisner if (self != UUID_JUST_CREATED && 2978b411b363SPhilipp Reisner (peer == UUID_JUST_CREATED || peer == (u64)0)) 2979b411b363SPhilipp Reisner return 2; 2980b411b363SPhilipp Reisner 2981b411b363SPhilipp Reisner if (self == peer) { 2982b411b363SPhilipp Reisner int rct, dc; /* roles at crash time */ 2983b411b363SPhilipp Reisner 2984b30ab791SAndreas Gruenbacher if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 2985b411b363SPhilipp Reisner 298644a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 29874a23f264SPhilipp Reisner return -1091; 2988b411b363SPhilipp Reisner 2989b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 2990b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 2991d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); 2992b30ab791SAndreas Gruenbacher drbd_uuid_move_history(device); 2993b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 2994b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_BITMAP] = 0; 2995b411b363SPhilipp Reisner 2996b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 2997b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 2998b411b363SPhilipp Reisner *rule_nr = 34; 2999b411b363SPhilipp Reisner } else { 3000d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); 3001b411b363SPhilipp Reisner *rule_nr = 36; 3002b411b363SPhilipp Reisner } 3003b411b363SPhilipp Reisner 3004b411b363SPhilipp Reisner return 1; 3005b411b363SPhilipp Reisner } 3006b411b363SPhilipp Reisner 3007b30ab791SAndreas Gruenbacher if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 3008b411b363SPhilipp Reisner 300944a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 30104a23f264SPhilipp Reisner return -1091; 3011b411b363SPhilipp Reisner 3012b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 3013b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 3014d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); 3015b411b363SPhilipp Reisner 3016b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; 3017b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; 3018b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = 0UL; 3019b411b363SPhilipp Reisner 3020b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 3021b411b363SPhilipp Reisner *rule_nr = 35; 3022b411b363SPhilipp Reisner } else { 3023d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); 3024b411b363SPhilipp Reisner *rule_nr = 37; 3025b411b363SPhilipp Reisner } 3026b411b363SPhilipp Reisner 3027b411b363SPhilipp Reisner return -1; 3028b411b363SPhilipp Reisner } 3029b411b363SPhilipp Reisner 3030b411b363SPhilipp Reisner /* Common power [off|failure] */ 3031b30ab791SAndreas Gruenbacher rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + 3032b30ab791SAndreas Gruenbacher (device->p_uuid[UI_FLAGS] & 2); 3033b411b363SPhilipp Reisner /* lowest bit is set when we were primary, 3034b411b363SPhilipp Reisner * next bit (weight 2) is set when peer was primary */ 3035b411b363SPhilipp Reisner *rule_nr = 40; 3036b411b363SPhilipp Reisner 3037b411b363SPhilipp Reisner switch (rct) { 3038b411b363SPhilipp Reisner case 0: /* !self_pri && !peer_pri */ return 0; 3039b411b363SPhilipp Reisner case 1: /* self_pri && !peer_pri */ return 1; 3040b411b363SPhilipp Reisner case 2: /* !self_pri && peer_pri */ return -1; 3041b411b363SPhilipp Reisner case 3: /* self_pri && peer_pri */ 304244a4d551SLars Ellenberg dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); 3043b411b363SPhilipp Reisner return dc ? -1 : 1; 3044b411b363SPhilipp Reisner } 3045b411b363SPhilipp Reisner } 3046b411b363SPhilipp Reisner 3047b411b363SPhilipp Reisner *rule_nr = 50; 3048b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 3049b411b363SPhilipp Reisner if (self == peer) 3050b411b363SPhilipp Reisner return -1; 3051b411b363SPhilipp Reisner 3052b411b363SPhilipp Reisner *rule_nr = 51; 3053b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 3054b411b363SPhilipp Reisner if (self == peer) { 305544a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 3056b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 3057b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 3058b30ab791SAndreas Gruenbacher peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 3059b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 3060b411b363SPhilipp Reisner resync as sync source modifications of the peer's UUIDs. */ 3061b411b363SPhilipp Reisner 306244a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 30634a23f264SPhilipp Reisner return -1091; 3064b411b363SPhilipp Reisner 3065b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 3066b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; 30674a23f264SPhilipp Reisner 3068d0180171SAndreas Gruenbacher drbd_info(device, "Lost last syncUUID packet, corrected:\n"); 3069b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 30704a23f264SPhilipp Reisner 3071b411b363SPhilipp Reisner return -1; 3072b411b363SPhilipp Reisner } 3073b411b363SPhilipp Reisner } 3074b411b363SPhilipp Reisner 3075b411b363SPhilipp Reisner *rule_nr = 60; 3076b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 3077b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3078b30ab791SAndreas Gruenbacher peer = device->p_uuid[i] & ~((u64)1); 3079b411b363SPhilipp Reisner if (self == peer) 3080b411b363SPhilipp Reisner return -2; 3081b411b363SPhilipp Reisner } 3082b411b363SPhilipp Reisner 3083b411b363SPhilipp Reisner *rule_nr = 70; 3084b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3085b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3086b411b363SPhilipp Reisner if (self == peer) 3087b411b363SPhilipp Reisner return 1; 3088b411b363SPhilipp Reisner 3089b411b363SPhilipp Reisner *rule_nr = 71; 3090b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 3091b411b363SPhilipp Reisner if (self == peer) { 309244a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 3093b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 3094b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 3095b30ab791SAndreas Gruenbacher self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 3096b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 3097b411b363SPhilipp Reisner resync as sync source modifications of our UUIDs. */ 3098b411b363SPhilipp Reisner 309944a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 31004a23f264SPhilipp Reisner return -1091; 3101b411b363SPhilipp Reisner 3102b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 3103b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); 3104b411b363SPhilipp Reisner 3105d0180171SAndreas Gruenbacher drbd_info(device, "Last syncUUID did not get through, corrected:\n"); 3106b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 3107b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 3108b411b363SPhilipp Reisner 3109b411b363SPhilipp Reisner return 1; 3110b411b363SPhilipp Reisner } 3111b411b363SPhilipp Reisner } 3112b411b363SPhilipp Reisner 3113b411b363SPhilipp Reisner 3114b411b363SPhilipp Reisner *rule_nr = 80; 3115b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3116b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3117b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3118b411b363SPhilipp Reisner if (self == peer) 3119b411b363SPhilipp Reisner return 2; 3120b411b363SPhilipp Reisner } 3121b411b363SPhilipp Reisner 3122b411b363SPhilipp Reisner *rule_nr = 90; 3123b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3124b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 3125b411b363SPhilipp Reisner if (self == peer && self != ((u64)0)) 3126b411b363SPhilipp Reisner return 100; 3127b411b363SPhilipp Reisner 3128b411b363SPhilipp Reisner *rule_nr = 100; 3129b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3130b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3131b411b363SPhilipp Reisner for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { 3132b30ab791SAndreas Gruenbacher peer = device->p_uuid[j] & ~((u64)1); 3133b411b363SPhilipp Reisner if (self == peer) 3134b411b363SPhilipp Reisner return -100; 3135b411b363SPhilipp Reisner } 3136b411b363SPhilipp Reisner } 3137b411b363SPhilipp Reisner 3138b411b363SPhilipp Reisner return -1000; 3139b411b363SPhilipp Reisner } 3140b411b363SPhilipp Reisner 3141b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or 3142b411b363SPhilipp Reisner CONN_MASK (-1) on failure. 3143b411b363SPhilipp Reisner */ 314469a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, 314569a22773SAndreas Gruenbacher enum drbd_role peer_role, 3146b411b363SPhilipp Reisner enum drbd_disk_state peer_disk) __must_hold(local) 3147b411b363SPhilipp Reisner { 314869a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 3149b411b363SPhilipp Reisner enum drbd_conns rv = C_MASK; 3150b411b363SPhilipp Reisner enum drbd_disk_state mydisk; 315144ed167dSPhilipp Reisner struct net_conf *nc; 31526dff2902SAndreas Gruenbacher int hg, rule_nr, rr_conflict, tentative; 3153b411b363SPhilipp Reisner 3154b30ab791SAndreas Gruenbacher mydisk = device->state.disk; 3155b411b363SPhilipp Reisner if (mydisk == D_NEGOTIATING) 3156b30ab791SAndreas Gruenbacher mydisk = device->new_state_tmp.disk; 3157b411b363SPhilipp Reisner 3158d0180171SAndreas Gruenbacher drbd_info(device, "drbd_sync_handshake:\n"); 31599f2247bbSPhilipp Reisner 3160b30ab791SAndreas Gruenbacher spin_lock_irq(&device->ldev->md.uuid_lock); 3161b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); 3162b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, 3163b30ab791SAndreas Gruenbacher device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 3164b411b363SPhilipp Reisner 3165b30ab791SAndreas Gruenbacher hg = drbd_uuid_compare(device, &rule_nr); 3166b30ab791SAndreas Gruenbacher spin_unlock_irq(&device->ldev->md.uuid_lock); 3167b411b363SPhilipp Reisner 3168d0180171SAndreas Gruenbacher drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 3169b411b363SPhilipp Reisner 3170b411b363SPhilipp Reisner if (hg == -1000) { 3171d0180171SAndreas Gruenbacher drbd_alert(device, "Unrelated data, aborting!\n"); 3172b411b363SPhilipp Reisner return C_MASK; 3173b411b363SPhilipp Reisner } 31744a23f264SPhilipp Reisner if (hg < -1000) { 3175d0180171SAndreas Gruenbacher drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); 3176b411b363SPhilipp Reisner return C_MASK; 3177b411b363SPhilipp Reisner } 3178b411b363SPhilipp Reisner 3179b411b363SPhilipp Reisner if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || 3180b411b363SPhilipp Reisner (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { 3181b411b363SPhilipp Reisner int f = (hg == -100) || abs(hg) == 2; 3182b411b363SPhilipp Reisner hg = mydisk > D_INCONSISTENT ? 1 : -1; 3183b411b363SPhilipp Reisner if (f) 3184b411b363SPhilipp Reisner hg = hg*2; 3185d0180171SAndreas Gruenbacher drbd_info(device, "Becoming sync %s due to disk states.\n", 3186b411b363SPhilipp Reisner hg > 0 ? "source" : "target"); 3187b411b363SPhilipp Reisner } 3188b411b363SPhilipp Reisner 31893a11a487SAdam Gandelman if (abs(hg) == 100) 3190b30ab791SAndreas Gruenbacher drbd_khelper(device, "initial-split-brain"); 31913a11a487SAdam Gandelman 319244ed167dSPhilipp Reisner rcu_read_lock(); 319369a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 319444ed167dSPhilipp Reisner 319544ed167dSPhilipp Reisner if (hg == 100 || (hg == -100 && nc->always_asbp)) { 3196b30ab791SAndreas Gruenbacher int pcount = (device->state.role == R_PRIMARY) 3197b411b363SPhilipp Reisner + (peer_role == R_PRIMARY); 3198b411b363SPhilipp Reisner int forced = (hg == -100); 3199b411b363SPhilipp Reisner 3200b411b363SPhilipp Reisner switch (pcount) { 3201b411b363SPhilipp Reisner case 0: 320269a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3203b411b363SPhilipp Reisner break; 3204b411b363SPhilipp Reisner case 1: 320569a22773SAndreas Gruenbacher hg = drbd_asb_recover_1p(peer_device); 3206b411b363SPhilipp Reisner break; 3207b411b363SPhilipp Reisner case 2: 320869a22773SAndreas Gruenbacher hg = drbd_asb_recover_2p(peer_device); 3209b411b363SPhilipp Reisner break; 3210b411b363SPhilipp Reisner } 3211b411b363SPhilipp Reisner if (abs(hg) < 100) { 3212d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, %d primaries, " 3213b411b363SPhilipp Reisner "automatically solved. Sync from %s node\n", 3214b411b363SPhilipp Reisner pcount, (hg < 0) ? "peer" : "this"); 3215b411b363SPhilipp Reisner if (forced) { 3216d0180171SAndreas Gruenbacher drbd_warn(device, "Doing a full sync, since" 3217b411b363SPhilipp Reisner " UUIDs where ambiguous.\n"); 3218b411b363SPhilipp Reisner hg = hg*2; 3219b411b363SPhilipp Reisner } 3220b411b363SPhilipp Reisner } 3221b411b363SPhilipp Reisner } 3222b411b363SPhilipp Reisner 3223b411b363SPhilipp Reisner if (hg == -100) { 3224b30ab791SAndreas Gruenbacher if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) 3225b411b363SPhilipp Reisner hg = -1; 3226b30ab791SAndreas Gruenbacher if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) 3227b411b363SPhilipp Reisner hg = 1; 3228b411b363SPhilipp Reisner 3229b411b363SPhilipp Reisner if (abs(hg) < 100) 3230d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, manually solved. " 3231b411b363SPhilipp Reisner "Sync from %s node\n", 3232b411b363SPhilipp Reisner (hg < 0) ? "peer" : "this"); 3233b411b363SPhilipp Reisner } 323444ed167dSPhilipp Reisner rr_conflict = nc->rr_conflict; 32356dff2902SAndreas Gruenbacher tentative = nc->tentative; 323644ed167dSPhilipp Reisner rcu_read_unlock(); 3237b411b363SPhilipp Reisner 3238b411b363SPhilipp Reisner if (hg == -100) { 3239580b9767SLars Ellenberg /* FIXME this log message is not correct if we end up here 3240580b9767SLars Ellenberg * after an attempted attach on a diskless node. 3241580b9767SLars Ellenberg * We just refuse to attach -- well, we drop the "connection" 3242580b9767SLars Ellenberg * to that disk, in a way... */ 3243d0180171SAndreas Gruenbacher drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); 3244b30ab791SAndreas Gruenbacher drbd_khelper(device, "split-brain"); 3245b411b363SPhilipp Reisner return C_MASK; 3246b411b363SPhilipp Reisner } 3247b411b363SPhilipp Reisner 3248b411b363SPhilipp Reisner if (hg > 0 && mydisk <= D_INCONSISTENT) { 3249d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); 3250b411b363SPhilipp Reisner return C_MASK; 3251b411b363SPhilipp Reisner } 3252b411b363SPhilipp Reisner 3253b411b363SPhilipp Reisner if (hg < 0 && /* by intention we do not use mydisk here. */ 3254b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { 325544ed167dSPhilipp Reisner switch (rr_conflict) { 3256b411b363SPhilipp Reisner case ASB_CALL_HELPER: 3257b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost"); 3258b411b363SPhilipp Reisner /* fall through */ 3259b411b363SPhilipp Reisner case ASB_DISCONNECT: 3260d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); 3261b411b363SPhilipp Reisner return C_MASK; 3262b411b363SPhilipp Reisner case ASB_VIOLENTLY: 3263d0180171SAndreas Gruenbacher drbd_warn(device, "Becoming SyncTarget, violating the stable-data" 3264b411b363SPhilipp Reisner "assumption\n"); 3265b411b363SPhilipp Reisner } 3266b411b363SPhilipp Reisner } 3267b411b363SPhilipp Reisner 326869a22773SAndreas Gruenbacher if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { 3269cf14c2e9SPhilipp Reisner if (hg == 0) 3270d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); 3271cf14c2e9SPhilipp Reisner else 3272d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", 3273cf14c2e9SPhilipp Reisner drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), 3274cf14c2e9SPhilipp Reisner abs(hg) >= 2 ? "full" : "bit-map based"); 3275cf14c2e9SPhilipp Reisner return C_MASK; 3276cf14c2e9SPhilipp Reisner } 3277cf14c2e9SPhilipp Reisner 3278b411b363SPhilipp Reisner if (abs(hg) >= 2) { 3279d0180171SAndreas Gruenbacher drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 3280b30ab791SAndreas Gruenbacher if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", 328120ceb2b2SLars Ellenberg BM_LOCKED_SET_ALLOWED)) 3282b411b363SPhilipp Reisner return C_MASK; 3283b411b363SPhilipp Reisner } 3284b411b363SPhilipp Reisner 3285b411b363SPhilipp Reisner if (hg > 0) { /* become sync source. */ 3286b411b363SPhilipp Reisner rv = C_WF_BITMAP_S; 3287b411b363SPhilipp Reisner } else if (hg < 0) { /* become sync target */ 3288b411b363SPhilipp Reisner rv = C_WF_BITMAP_T; 3289b411b363SPhilipp Reisner } else { 3290b411b363SPhilipp Reisner rv = C_CONNECTED; 3291b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device)) { 3292d0180171SAndreas Gruenbacher drbd_info(device, "No resync, but %lu bits in bitmap!\n", 3293b30ab791SAndreas Gruenbacher drbd_bm_total_weight(device)); 3294b411b363SPhilipp Reisner } 3295b411b363SPhilipp Reisner } 3296b411b363SPhilipp Reisner 3297b411b363SPhilipp Reisner return rv; 3298b411b363SPhilipp Reisner } 3299b411b363SPhilipp Reisner 3300f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) 3301b411b363SPhilipp Reisner { 3302b411b363SPhilipp Reisner /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 3303f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_REMOTE) 3304f179d76dSPhilipp Reisner return ASB_DISCARD_LOCAL; 3305b411b363SPhilipp Reisner 3306b411b363SPhilipp Reisner /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 3307f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_LOCAL) 3308f179d76dSPhilipp Reisner return ASB_DISCARD_REMOTE; 3309b411b363SPhilipp Reisner 3310b411b363SPhilipp Reisner /* everything else is valid if they are equal on both sides. */ 3311f179d76dSPhilipp Reisner return peer; 3312b411b363SPhilipp Reisner } 3313b411b363SPhilipp Reisner 3314bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) 3315b411b363SPhilipp Reisner { 3316e658983aSAndreas Gruenbacher struct p_protocol *p = pi->data; 3317036b17eaSPhilipp Reisner enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 3318036b17eaSPhilipp Reisner int p_proto, p_discard_my_data, p_two_primaries, cf; 3319036b17eaSPhilipp Reisner struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; 3320036b17eaSPhilipp Reisner char integrity_alg[SHARED_SECRET_MAX] = ""; 3321accdbcc5SAndreas Gruenbacher struct crypto_hash *peer_integrity_tfm = NULL; 33227aca6c75SPhilipp Reisner void *int_dig_in = NULL, *int_dig_vv = NULL; 3323b411b363SPhilipp Reisner 3324b411b363SPhilipp Reisner p_proto = be32_to_cpu(p->protocol); 3325b411b363SPhilipp Reisner p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 3326b411b363SPhilipp Reisner p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 3327b411b363SPhilipp Reisner p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 3328b411b363SPhilipp Reisner p_two_primaries = be32_to_cpu(p->two_primaries); 3329cf14c2e9SPhilipp Reisner cf = be32_to_cpu(p->conn_flags); 33306139f60dSAndreas Gruenbacher p_discard_my_data = cf & CF_DISCARD_MY_DATA; 3331cf14c2e9SPhilipp Reisner 3332bde89a9eSAndreas Gruenbacher if (connection->agreed_pro_version >= 87) { 333386db0618SAndreas Gruenbacher int err; 333486db0618SAndreas Gruenbacher 333588104ca4SAndreas Gruenbacher if (pi->size > sizeof(integrity_alg)) 333686db0618SAndreas Gruenbacher return -EIO; 3337bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, integrity_alg, pi->size); 333886db0618SAndreas Gruenbacher if (err) 333986db0618SAndreas Gruenbacher return err; 334088104ca4SAndreas Gruenbacher integrity_alg[SHARED_SECRET_MAX - 1] = 0; 3341036b17eaSPhilipp Reisner } 334286db0618SAndreas Gruenbacher 33437d4c782cSAndreas Gruenbacher if (pi->cmd != P_PROTOCOL_UPDATE) { 3344bde89a9eSAndreas Gruenbacher clear_bit(CONN_DRY_RUN, &connection->flags); 3345cf14c2e9SPhilipp Reisner 3346cf14c2e9SPhilipp Reisner if (cf & CF_DRY_RUN) 3347bde89a9eSAndreas Gruenbacher set_bit(CONN_DRY_RUN, &connection->flags); 3348b411b363SPhilipp Reisner 334944ed167dSPhilipp Reisner rcu_read_lock(); 3350bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 335144ed167dSPhilipp Reisner 3352036b17eaSPhilipp Reisner if (p_proto != nc->wire_protocol) { 33531ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "protocol"); 335444ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3355b411b363SPhilipp Reisner } 3356b411b363SPhilipp Reisner 3357f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { 33581ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); 335944ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3360b411b363SPhilipp Reisner } 3361b411b363SPhilipp Reisner 3362f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { 33631ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); 336444ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3365b411b363SPhilipp Reisner } 3366b411b363SPhilipp Reisner 3367f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { 33681ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); 336944ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3370b411b363SPhilipp Reisner } 3371b411b363SPhilipp Reisner 33726139f60dSAndreas Gruenbacher if (p_discard_my_data && nc->discard_my_data) { 33731ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); 337444ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3375b411b363SPhilipp Reisner } 3376b411b363SPhilipp Reisner 337744ed167dSPhilipp Reisner if (p_two_primaries != nc->two_primaries) { 33781ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); 337944ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3380b411b363SPhilipp Reisner } 3381b411b363SPhilipp Reisner 3382036b17eaSPhilipp Reisner if (strcmp(integrity_alg, nc->integrity_alg)) { 33831ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); 3384036b17eaSPhilipp Reisner goto disconnect_rcu_unlock; 3385036b17eaSPhilipp Reisner } 3386036b17eaSPhilipp Reisner 338786db0618SAndreas Gruenbacher rcu_read_unlock(); 3388fbc12f45SAndreas Gruenbacher } 33897d4c782cSAndreas Gruenbacher 33907d4c782cSAndreas Gruenbacher if (integrity_alg[0]) { 33917d4c782cSAndreas Gruenbacher int hash_size; 33927d4c782cSAndreas Gruenbacher 33937d4c782cSAndreas Gruenbacher /* 33947d4c782cSAndreas Gruenbacher * We can only change the peer data integrity algorithm 33957d4c782cSAndreas Gruenbacher * here. Changing our own data integrity algorithm 33967d4c782cSAndreas Gruenbacher * requires that we send a P_PROTOCOL_UPDATE packet at 33977d4c782cSAndreas Gruenbacher * the same time; otherwise, the peer has no way to 33987d4c782cSAndreas Gruenbacher * tell between which packets the algorithm should 33997d4c782cSAndreas Gruenbacher * change. 34007d4c782cSAndreas Gruenbacher */ 34017d4c782cSAndreas Gruenbacher 34027d4c782cSAndreas Gruenbacher peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC); 34037d4c782cSAndreas Gruenbacher if (!peer_integrity_tfm) { 34041ec861ebSAndreas Gruenbacher drbd_err(connection, "peer data-integrity-alg %s not supported\n", 34057d4c782cSAndreas Gruenbacher integrity_alg); 3406b411b363SPhilipp Reisner goto disconnect; 3407b411b363SPhilipp Reisner } 3408b411b363SPhilipp Reisner 34097d4c782cSAndreas Gruenbacher hash_size = crypto_hash_digestsize(peer_integrity_tfm); 34107d4c782cSAndreas Gruenbacher int_dig_in = kmalloc(hash_size, GFP_KERNEL); 34117d4c782cSAndreas Gruenbacher int_dig_vv = kmalloc(hash_size, GFP_KERNEL); 34127d4c782cSAndreas Gruenbacher if (!(int_dig_in && int_dig_vv)) { 34131ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); 34147d4c782cSAndreas Gruenbacher goto disconnect; 34157d4c782cSAndreas Gruenbacher } 34167d4c782cSAndreas Gruenbacher } 34177d4c782cSAndreas Gruenbacher 34187d4c782cSAndreas Gruenbacher new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 34197d4c782cSAndreas Gruenbacher if (!new_net_conf) { 34201ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of new net_conf failed\n"); 3421b411b363SPhilipp Reisner goto disconnect; 3422b411b363SPhilipp Reisner } 3423b411b363SPhilipp Reisner 3424bde89a9eSAndreas Gruenbacher mutex_lock(&connection->data.mutex); 34250500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 3426bde89a9eSAndreas Gruenbacher old_net_conf = connection->net_conf; 34277d4c782cSAndreas Gruenbacher *new_net_conf = *old_net_conf; 3428b411b363SPhilipp Reisner 34297d4c782cSAndreas Gruenbacher new_net_conf->wire_protocol = p_proto; 34307d4c782cSAndreas Gruenbacher new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); 34317d4c782cSAndreas Gruenbacher new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); 34327d4c782cSAndreas Gruenbacher new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); 34337d4c782cSAndreas Gruenbacher new_net_conf->two_primaries = p_two_primaries; 3434b411b363SPhilipp Reisner 3435bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 34360500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3437bde89a9eSAndreas Gruenbacher mutex_unlock(&connection->data.mutex); 3438b411b363SPhilipp Reisner 3439bde89a9eSAndreas Gruenbacher crypto_free_hash(connection->peer_integrity_tfm); 3440bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_in); 3441bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_vv); 3442bde89a9eSAndreas Gruenbacher connection->peer_integrity_tfm = peer_integrity_tfm; 3443bde89a9eSAndreas Gruenbacher connection->int_dig_in = int_dig_in; 3444bde89a9eSAndreas Gruenbacher connection->int_dig_vv = int_dig_vv; 3445b411b363SPhilipp Reisner 34467d4c782cSAndreas Gruenbacher if (strcmp(old_net_conf->integrity_alg, integrity_alg)) 34471ec861ebSAndreas Gruenbacher drbd_info(connection, "peer data-integrity-alg: %s\n", 34487d4c782cSAndreas Gruenbacher integrity_alg[0] ? integrity_alg : "(none)"); 3449b411b363SPhilipp Reisner 34507d4c782cSAndreas Gruenbacher synchronize_rcu(); 34517d4c782cSAndreas Gruenbacher kfree(old_net_conf); 345282bc0194SAndreas Gruenbacher return 0; 3453b411b363SPhilipp Reisner 345444ed167dSPhilipp Reisner disconnect_rcu_unlock: 345544ed167dSPhilipp Reisner rcu_read_unlock(); 3456b411b363SPhilipp Reisner disconnect: 3457b792c35cSAndreas Gruenbacher crypto_free_hash(peer_integrity_tfm); 3458036b17eaSPhilipp Reisner kfree(int_dig_in); 3459036b17eaSPhilipp Reisner kfree(int_dig_vv); 3460bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 346182bc0194SAndreas Gruenbacher return -EIO; 3462b411b363SPhilipp Reisner } 3463b411b363SPhilipp Reisner 3464b411b363SPhilipp Reisner /* helper function 3465b411b363SPhilipp Reisner * input: alg name, feature name 3466b411b363SPhilipp Reisner * return: NULL (alg name was "") 3467b411b363SPhilipp Reisner * ERR_PTR(error) if something goes wrong 3468b411b363SPhilipp Reisner * or the crypto hash ptr, if it worked out ok. */ 34698ce953aaSLars Ellenberg static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, 3470b411b363SPhilipp Reisner const char *alg, const char *name) 3471b411b363SPhilipp Reisner { 3472b411b363SPhilipp Reisner struct crypto_hash *tfm; 3473b411b363SPhilipp Reisner 3474b411b363SPhilipp Reisner if (!alg[0]) 3475b411b363SPhilipp Reisner return NULL; 3476b411b363SPhilipp Reisner 3477b411b363SPhilipp Reisner tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); 3478b411b363SPhilipp Reisner if (IS_ERR(tfm)) { 3479d0180171SAndreas Gruenbacher drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", 3480b411b363SPhilipp Reisner alg, name, PTR_ERR(tfm)); 3481b411b363SPhilipp Reisner return tfm; 3482b411b363SPhilipp Reisner } 3483b411b363SPhilipp Reisner return tfm; 3484b411b363SPhilipp Reisner } 3485b411b363SPhilipp Reisner 3486bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) 3487b411b363SPhilipp Reisner { 3488bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 34894a76b161SAndreas Gruenbacher int size = pi->size; 34904a76b161SAndreas Gruenbacher 34914a76b161SAndreas Gruenbacher while (size) { 34924a76b161SAndreas Gruenbacher int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); 3493bde89a9eSAndreas Gruenbacher s = drbd_recv(connection, buffer, s); 34944a76b161SAndreas Gruenbacher if (s <= 0) { 34954a76b161SAndreas Gruenbacher if (s < 0) 34964a76b161SAndreas Gruenbacher return s; 34974a76b161SAndreas Gruenbacher break; 34984a76b161SAndreas Gruenbacher } 34994a76b161SAndreas Gruenbacher size -= s; 35004a76b161SAndreas Gruenbacher } 35014a76b161SAndreas Gruenbacher if (size) 35024a76b161SAndreas Gruenbacher return -EIO; 35034a76b161SAndreas Gruenbacher return 0; 35044a76b161SAndreas Gruenbacher } 35054a76b161SAndreas Gruenbacher 35064a76b161SAndreas Gruenbacher /* 35074a76b161SAndreas Gruenbacher * config_unknown_volume - device configuration command for unknown volume 35084a76b161SAndreas Gruenbacher * 35094a76b161SAndreas Gruenbacher * When a device is added to an existing connection, the node on which the 35104a76b161SAndreas Gruenbacher * device is added first will send configuration commands to its peer but the 35114a76b161SAndreas Gruenbacher * peer will not know about the device yet. It will warn and ignore these 35124a76b161SAndreas Gruenbacher * commands. Once the device is added on the second node, the second node will 35134a76b161SAndreas Gruenbacher * send the same device configuration commands, but in the other direction. 35144a76b161SAndreas Gruenbacher * 35154a76b161SAndreas Gruenbacher * (We can also end up here if drbd is misconfigured.) 35164a76b161SAndreas Gruenbacher */ 3517bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) 35184a76b161SAndreas Gruenbacher { 35191ec861ebSAndreas Gruenbacher drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", 35202fcb8f30SAndreas Gruenbacher cmdname(pi->cmd), pi->vnr); 3521bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 35224a76b161SAndreas Gruenbacher } 35234a76b161SAndreas Gruenbacher 3524bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) 35254a76b161SAndreas Gruenbacher { 35269f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3527b30ab791SAndreas Gruenbacher struct drbd_device *device; 3528e658983aSAndreas Gruenbacher struct p_rs_param_95 *p; 3529b411b363SPhilipp Reisner unsigned int header_size, data_size, exp_max_sz; 3530b411b363SPhilipp Reisner struct crypto_hash *verify_tfm = NULL; 3531b411b363SPhilipp Reisner struct crypto_hash *csums_tfm = NULL; 35322ec91e0eSPhilipp Reisner struct net_conf *old_net_conf, *new_net_conf = NULL; 3533813472ceSPhilipp Reisner struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 3534bde89a9eSAndreas Gruenbacher const int apv = connection->agreed_pro_version; 3535813472ceSPhilipp Reisner struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 3536778f271dSPhilipp Reisner int fifo_size = 0; 353782bc0194SAndreas Gruenbacher int err; 3538b411b363SPhilipp Reisner 35399f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 35409f4fe9adSAndreas Gruenbacher if (!peer_device) 3541bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 35429f4fe9adSAndreas Gruenbacher device = peer_device->device; 3543b411b363SPhilipp Reisner 3544b411b363SPhilipp Reisner exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 3545b411b363SPhilipp Reisner : apv == 88 ? sizeof(struct p_rs_param) 3546b411b363SPhilipp Reisner + SHARED_SECRET_MAX 35478e26f9ccSPhilipp Reisner : apv <= 94 ? sizeof(struct p_rs_param_89) 35488e26f9ccSPhilipp Reisner : /* apv >= 95 */ sizeof(struct p_rs_param_95); 3549b411b363SPhilipp Reisner 3550e2857216SAndreas Gruenbacher if (pi->size > exp_max_sz) { 3551d0180171SAndreas Gruenbacher drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", 3552e2857216SAndreas Gruenbacher pi->size, exp_max_sz); 355382bc0194SAndreas Gruenbacher return -EIO; 3554b411b363SPhilipp Reisner } 3555b411b363SPhilipp Reisner 3556b411b363SPhilipp Reisner if (apv <= 88) { 3557e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param); 3558e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 35598e26f9ccSPhilipp Reisner } else if (apv <= 94) { 3560e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_89); 3561e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 35620b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 35638e26f9ccSPhilipp Reisner } else { 3564e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_95); 3565e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 35660b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 3567b411b363SPhilipp Reisner } 3568b411b363SPhilipp Reisner 3569b411b363SPhilipp Reisner /* initialize verify_alg and csums_alg */ 3570e658983aSAndreas Gruenbacher p = pi->data; 3571b411b363SPhilipp Reisner memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 3572b411b363SPhilipp Reisner 35739f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, header_size); 357482bc0194SAndreas Gruenbacher if (err) 357582bc0194SAndreas Gruenbacher return err; 3576b411b363SPhilipp Reisner 35770500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 35789f4fe9adSAndreas Gruenbacher old_net_conf = peer_device->connection->net_conf; 3579b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3580daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 3581daeda1ccSPhilipp Reisner if (!new_disk_conf) { 3582b30ab791SAndreas Gruenbacher put_ldev(device); 35830500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3584d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 3585daeda1ccSPhilipp Reisner return -ENOMEM; 3586f399002eSLars Ellenberg } 3587b411b363SPhilipp Reisner 3588b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 3589daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 3590daeda1ccSPhilipp Reisner 35916394b935SAndreas Gruenbacher new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); 3592813472ceSPhilipp Reisner } 3593b411b363SPhilipp Reisner 3594b411b363SPhilipp Reisner if (apv >= 88) { 3595b411b363SPhilipp Reisner if (apv == 88) { 35965de73827SPhilipp Reisner if (data_size > SHARED_SECRET_MAX || data_size == 0) { 3597d0180171SAndreas Gruenbacher drbd_err(device, "verify-alg of wrong size, " 35985de73827SPhilipp Reisner "peer wants %u, accepting only up to %u byte\n", 3599b411b363SPhilipp Reisner data_size, SHARED_SECRET_MAX); 3600813472ceSPhilipp Reisner err = -EIO; 3601813472ceSPhilipp Reisner goto reconnect; 3602b411b363SPhilipp Reisner } 3603b411b363SPhilipp Reisner 36049f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); 3605813472ceSPhilipp Reisner if (err) 3606813472ceSPhilipp Reisner goto reconnect; 3607b411b363SPhilipp Reisner /* we expect NUL terminated string */ 3608b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 36090b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[data_size-1] == 0); 3610b411b363SPhilipp Reisner p->verify_alg[data_size-1] = 0; 3611b411b363SPhilipp Reisner 3612b411b363SPhilipp Reisner } else /* apv >= 89 */ { 3613b411b363SPhilipp Reisner /* we still expect NUL terminated strings */ 3614b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 36150b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); 36160b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); 3617b411b363SPhilipp Reisner p->verify_alg[SHARED_SECRET_MAX-1] = 0; 3618b411b363SPhilipp Reisner p->csums_alg[SHARED_SECRET_MAX-1] = 0; 3619b411b363SPhilipp Reisner } 3620b411b363SPhilipp Reisner 36212ec91e0eSPhilipp Reisner if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { 3622b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3623d0180171SAndreas Gruenbacher drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 36242ec91e0eSPhilipp Reisner old_net_conf->verify_alg, p->verify_alg); 3625b411b363SPhilipp Reisner goto disconnect; 3626b411b363SPhilipp Reisner } 3627b30ab791SAndreas Gruenbacher verify_tfm = drbd_crypto_alloc_digest_safe(device, 3628b411b363SPhilipp Reisner p->verify_alg, "verify-alg"); 3629b411b363SPhilipp Reisner if (IS_ERR(verify_tfm)) { 3630b411b363SPhilipp Reisner verify_tfm = NULL; 3631b411b363SPhilipp Reisner goto disconnect; 3632b411b363SPhilipp Reisner } 3633b411b363SPhilipp Reisner } 3634b411b363SPhilipp Reisner 36352ec91e0eSPhilipp Reisner if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { 3636b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3637d0180171SAndreas Gruenbacher drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 36382ec91e0eSPhilipp Reisner old_net_conf->csums_alg, p->csums_alg); 3639b411b363SPhilipp Reisner goto disconnect; 3640b411b363SPhilipp Reisner } 3641b30ab791SAndreas Gruenbacher csums_tfm = drbd_crypto_alloc_digest_safe(device, 3642b411b363SPhilipp Reisner p->csums_alg, "csums-alg"); 3643b411b363SPhilipp Reisner if (IS_ERR(csums_tfm)) { 3644b411b363SPhilipp Reisner csums_tfm = NULL; 3645b411b363SPhilipp Reisner goto disconnect; 3646b411b363SPhilipp Reisner } 3647b411b363SPhilipp Reisner } 3648b411b363SPhilipp Reisner 3649813472ceSPhilipp Reisner if (apv > 94 && new_disk_conf) { 3650daeda1ccSPhilipp Reisner new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 3651daeda1ccSPhilipp Reisner new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); 3652daeda1ccSPhilipp Reisner new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); 3653daeda1ccSPhilipp Reisner new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); 3654778f271dSPhilipp Reisner 3655daeda1ccSPhilipp Reisner fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 3656b30ab791SAndreas Gruenbacher if (fifo_size != device->rs_plan_s->size) { 3657813472ceSPhilipp Reisner new_plan = fifo_alloc(fifo_size); 3658813472ceSPhilipp Reisner if (!new_plan) { 3659d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of fifo_buffer failed"); 3660b30ab791SAndreas Gruenbacher put_ldev(device); 3661778f271dSPhilipp Reisner goto disconnect; 3662778f271dSPhilipp Reisner } 3663778f271dSPhilipp Reisner } 36648e26f9ccSPhilipp Reisner } 3665b411b363SPhilipp Reisner 366691fd4dadSPhilipp Reisner if (verify_tfm || csums_tfm) { 36672ec91e0eSPhilipp Reisner new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 36682ec91e0eSPhilipp Reisner if (!new_net_conf) { 3669d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new net_conf failed\n"); 367091fd4dadSPhilipp Reisner goto disconnect; 367191fd4dadSPhilipp Reisner } 367291fd4dadSPhilipp Reisner 36732ec91e0eSPhilipp Reisner *new_net_conf = *old_net_conf; 367491fd4dadSPhilipp Reisner 3675b411b363SPhilipp Reisner if (verify_tfm) { 36762ec91e0eSPhilipp Reisner strcpy(new_net_conf->verify_alg, p->verify_alg); 36772ec91e0eSPhilipp Reisner new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; 36789f4fe9adSAndreas Gruenbacher crypto_free_hash(peer_device->connection->verify_tfm); 36799f4fe9adSAndreas Gruenbacher peer_device->connection->verify_tfm = verify_tfm; 3680d0180171SAndreas Gruenbacher drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); 3681b411b363SPhilipp Reisner } 3682b411b363SPhilipp Reisner if (csums_tfm) { 36832ec91e0eSPhilipp Reisner strcpy(new_net_conf->csums_alg, p->csums_alg); 36842ec91e0eSPhilipp Reisner new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; 36859f4fe9adSAndreas Gruenbacher crypto_free_hash(peer_device->connection->csums_tfm); 36869f4fe9adSAndreas Gruenbacher peer_device->connection->csums_tfm = csums_tfm; 3687d0180171SAndreas Gruenbacher drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); 3688b411b363SPhilipp Reisner } 3689bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 3690778f271dSPhilipp Reisner } 3691b411b363SPhilipp Reisner } 3692b411b363SPhilipp Reisner 3693813472ceSPhilipp Reisner if (new_disk_conf) { 3694b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 3695b30ab791SAndreas Gruenbacher put_ldev(device); 3696b411b363SPhilipp Reisner } 3697813472ceSPhilipp Reisner 3698813472ceSPhilipp Reisner if (new_plan) { 3699b30ab791SAndreas Gruenbacher old_plan = device->rs_plan_s; 3700b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->rs_plan_s, new_plan); 3701813472ceSPhilipp Reisner } 3702daeda1ccSPhilipp Reisner 37030500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3704daeda1ccSPhilipp Reisner synchronize_rcu(); 3705daeda1ccSPhilipp Reisner if (new_net_conf) 3706daeda1ccSPhilipp Reisner kfree(old_net_conf); 3707daeda1ccSPhilipp Reisner kfree(old_disk_conf); 3708813472ceSPhilipp Reisner kfree(old_plan); 3709daeda1ccSPhilipp Reisner 371082bc0194SAndreas Gruenbacher return 0; 3711b411b363SPhilipp Reisner 3712813472ceSPhilipp Reisner reconnect: 3713813472ceSPhilipp Reisner if (new_disk_conf) { 3714b30ab791SAndreas Gruenbacher put_ldev(device); 3715813472ceSPhilipp Reisner kfree(new_disk_conf); 3716813472ceSPhilipp Reisner } 37170500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3718813472ceSPhilipp Reisner return -EIO; 3719813472ceSPhilipp Reisner 3720b411b363SPhilipp Reisner disconnect: 3721813472ceSPhilipp Reisner kfree(new_plan); 3722813472ceSPhilipp Reisner if (new_disk_conf) { 3723b30ab791SAndreas Gruenbacher put_ldev(device); 3724813472ceSPhilipp Reisner kfree(new_disk_conf); 3725813472ceSPhilipp Reisner } 37260500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3727b411b363SPhilipp Reisner /* just for completeness: actually not needed, 3728b411b363SPhilipp Reisner * as this is not reached if csums_tfm was ok. */ 3729b411b363SPhilipp Reisner crypto_free_hash(csums_tfm); 3730b411b363SPhilipp Reisner /* but free the verify_tfm again, if csums_tfm did not work out */ 3731b411b363SPhilipp Reisner crypto_free_hash(verify_tfm); 37329f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 373382bc0194SAndreas Gruenbacher return -EIO; 3734b411b363SPhilipp Reisner } 3735b411b363SPhilipp Reisner 3736b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */ 3737b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device, 3738b411b363SPhilipp Reisner const char *s, sector_t a, sector_t b) 3739b411b363SPhilipp Reisner { 3740b411b363SPhilipp Reisner sector_t d; 3741b411b363SPhilipp Reisner if (a == 0 || b == 0) 3742b411b363SPhilipp Reisner return; 3743b411b363SPhilipp Reisner d = (a > b) ? (a - b) : (b - a); 3744b411b363SPhilipp Reisner if (d > (a>>3) || d > (b>>3)) 3745d0180171SAndreas Gruenbacher drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, 3746b411b363SPhilipp Reisner (unsigned long long)a, (unsigned long long)b); 3747b411b363SPhilipp Reisner } 3748b411b363SPhilipp Reisner 3749bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) 3750b411b363SPhilipp Reisner { 37519f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3752b30ab791SAndreas Gruenbacher struct drbd_device *device; 3753e658983aSAndreas Gruenbacher struct p_sizes *p = pi->data; 3754e96c9633SPhilipp Reisner enum determine_dev_size dd = DS_UNCHANGED; 37556a8d68b1SLars Ellenberg sector_t p_size, p_usize, p_csize, my_usize; 3756b411b363SPhilipp Reisner int ldsc = 0; /* local disk size changed */ 3757e89b591cSPhilipp Reisner enum dds_flags ddsf; 3758b411b363SPhilipp Reisner 37599f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 37609f4fe9adSAndreas Gruenbacher if (!peer_device) 3761bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 37629f4fe9adSAndreas Gruenbacher device = peer_device->device; 37634a76b161SAndreas Gruenbacher 3764b411b363SPhilipp Reisner p_size = be64_to_cpu(p->d_size); 3765b411b363SPhilipp Reisner p_usize = be64_to_cpu(p->u_size); 37666a8d68b1SLars Ellenberg p_csize = be64_to_cpu(p->c_size); 3767b411b363SPhilipp Reisner 3768b411b363SPhilipp Reisner /* just store the peer's disk size for now. 3769b411b363SPhilipp Reisner * we still need to figure out whether we accept that. */ 3770b30ab791SAndreas Gruenbacher device->p_size = p_size; 3771b411b363SPhilipp Reisner 3772b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3773daeda1ccSPhilipp Reisner rcu_read_lock(); 3774b30ab791SAndreas Gruenbacher my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 3775daeda1ccSPhilipp Reisner rcu_read_unlock(); 3776daeda1ccSPhilipp Reisner 3777b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "lower level device sizes", 3778b30ab791SAndreas Gruenbacher p_size, drbd_get_max_capacity(device->ldev)); 3779b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "user requested size", 3780daeda1ccSPhilipp Reisner p_usize, my_usize); 3781b411b363SPhilipp Reisner 3782b411b363SPhilipp Reisner /* if this is the first connect, or an otherwise expected 3783b411b363SPhilipp Reisner * param exchange, choose the minimum */ 3784b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) 3785daeda1ccSPhilipp Reisner p_usize = min_not_zero(my_usize, p_usize); 3786b411b363SPhilipp Reisner 3787b411b363SPhilipp Reisner /* Never shrink a device with usable data during connect. 3788b411b363SPhilipp Reisner But allow online shrinking if we are connected. */ 3789b30ab791SAndreas Gruenbacher if (drbd_new_dev_size(device, device->ldev, p_usize, 0) < 3790b30ab791SAndreas Gruenbacher drbd_get_capacity(device->this_bdev) && 3791b30ab791SAndreas Gruenbacher device->state.disk >= D_OUTDATED && 3792b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED) { 3793d0180171SAndreas Gruenbacher drbd_err(device, "The peer's disk size is too small!\n"); 37949f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 3795b30ab791SAndreas Gruenbacher put_ldev(device); 379682bc0194SAndreas Gruenbacher return -EIO; 3797b411b363SPhilipp Reisner } 3798daeda1ccSPhilipp Reisner 3799daeda1ccSPhilipp Reisner if (my_usize != p_usize) { 3800daeda1ccSPhilipp Reisner struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 3801daeda1ccSPhilipp Reisner 3802daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 3803daeda1ccSPhilipp Reisner if (!new_disk_conf) { 3804d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 3805b30ab791SAndreas Gruenbacher put_ldev(device); 3806daeda1ccSPhilipp Reisner return -ENOMEM; 3807daeda1ccSPhilipp Reisner } 3808daeda1ccSPhilipp Reisner 38090500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 3810b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 3811daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 3812daeda1ccSPhilipp Reisner new_disk_conf->disk_size = p_usize; 3813daeda1ccSPhilipp Reisner 3814b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 38150500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3816daeda1ccSPhilipp Reisner synchronize_rcu(); 3817daeda1ccSPhilipp Reisner kfree(old_disk_conf); 3818daeda1ccSPhilipp Reisner 3819d0180171SAndreas Gruenbacher drbd_info(device, "Peer sets u_size to %lu sectors\n", 3820daeda1ccSPhilipp Reisner (unsigned long)my_usize); 3821daeda1ccSPhilipp Reisner } 3822daeda1ccSPhilipp Reisner 3823b30ab791SAndreas Gruenbacher put_ldev(device); 3824b411b363SPhilipp Reisner } 3825b411b363SPhilipp Reisner 382620c68fdeSLars Ellenberg device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 382720c68fdeSLars Ellenberg /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). 382820c68fdeSLars Ellenberg In case we cleared the QUEUE_FLAG_DISCARD from our queue in 382920c68fdeSLars Ellenberg drbd_reconsider_max_bio_size(), we can be sure that after 383020c68fdeSLars Ellenberg drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ 383120c68fdeSLars Ellenberg 3832e89b591cSPhilipp Reisner ddsf = be16_to_cpu(p->dds_flags); 3833b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 38348fe39aacSPhilipp Reisner drbd_reconsider_max_bio_size(device, device->ldev); 3835b30ab791SAndreas Gruenbacher dd = drbd_determine_dev_size(device, ddsf, NULL); 3836b30ab791SAndreas Gruenbacher put_ldev(device); 3837e96c9633SPhilipp Reisner if (dd == DS_ERROR) 383882bc0194SAndreas Gruenbacher return -EIO; 3839b30ab791SAndreas Gruenbacher drbd_md_sync(device); 3840b411b363SPhilipp Reisner } else { 38416a8d68b1SLars Ellenberg /* 38426a8d68b1SLars Ellenberg * I am diskless, need to accept the peer's *current* size. 38436a8d68b1SLars Ellenberg * I must NOT accept the peers backing disk size, 38446a8d68b1SLars Ellenberg * it may have been larger than mine all along... 38456a8d68b1SLars Ellenberg * 38466a8d68b1SLars Ellenberg * At this point, the peer knows more about my disk, or at 38476a8d68b1SLars Ellenberg * least about what we last agreed upon, than myself. 38486a8d68b1SLars Ellenberg * So if his c_size is less than his d_size, the most likely 38496a8d68b1SLars Ellenberg * reason is that *my* d_size was smaller last time we checked. 38506a8d68b1SLars Ellenberg * 38516a8d68b1SLars Ellenberg * However, if he sends a zero current size, 38526a8d68b1SLars Ellenberg * take his (user-capped or) backing disk size anyways. 38536a8d68b1SLars Ellenberg */ 38548fe39aacSPhilipp Reisner drbd_reconsider_max_bio_size(device, NULL); 38556a8d68b1SLars Ellenberg drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); 3856b411b363SPhilipp Reisner } 3857b411b363SPhilipp Reisner 3858b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3859b30ab791SAndreas Gruenbacher if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { 3860b30ab791SAndreas Gruenbacher device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 3861b411b363SPhilipp Reisner ldsc = 1; 3862b411b363SPhilipp Reisner } 3863b411b363SPhilipp Reisner 3864b30ab791SAndreas Gruenbacher put_ldev(device); 3865b411b363SPhilipp Reisner } 3866b411b363SPhilipp Reisner 3867b30ab791SAndreas Gruenbacher if (device->state.conn > C_WF_REPORT_PARAMS) { 3868b411b363SPhilipp Reisner if (be64_to_cpu(p->c_size) != 3869b30ab791SAndreas Gruenbacher drbd_get_capacity(device->this_bdev) || ldsc) { 3870b411b363SPhilipp Reisner /* we have different sizes, probably peer 3871b411b363SPhilipp Reisner * needs to know my new size... */ 387269a22773SAndreas Gruenbacher drbd_send_sizes(peer_device, 0, ddsf); 3873b411b363SPhilipp Reisner } 3874b30ab791SAndreas Gruenbacher if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || 3875b30ab791SAndreas Gruenbacher (dd == DS_GREW && device->state.conn == C_CONNECTED)) { 3876b30ab791SAndreas Gruenbacher if (device->state.pdsk >= D_INCONSISTENT && 3877b30ab791SAndreas Gruenbacher device->state.disk >= D_INCONSISTENT) { 3878e89b591cSPhilipp Reisner if (ddsf & DDSF_NO_RESYNC) 3879d0180171SAndreas Gruenbacher drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); 3880b411b363SPhilipp Reisner else 3881b30ab791SAndreas Gruenbacher resync_after_online_grow(device); 3882e89b591cSPhilipp Reisner } else 3883b30ab791SAndreas Gruenbacher set_bit(RESYNC_AFTER_NEG, &device->flags); 3884b411b363SPhilipp Reisner } 3885b411b363SPhilipp Reisner } 3886b411b363SPhilipp Reisner 388782bc0194SAndreas Gruenbacher return 0; 3888b411b363SPhilipp Reisner } 3889b411b363SPhilipp Reisner 3890bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) 3891b411b363SPhilipp Reisner { 38929f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3893b30ab791SAndreas Gruenbacher struct drbd_device *device; 3894e658983aSAndreas Gruenbacher struct p_uuids *p = pi->data; 3895b411b363SPhilipp Reisner u64 *p_uuid; 389662b0da3aSLars Ellenberg int i, updated_uuids = 0; 3897b411b363SPhilipp Reisner 38989f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 38999f4fe9adSAndreas Gruenbacher if (!peer_device) 3900bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 39019f4fe9adSAndreas Gruenbacher device = peer_device->device; 39024a76b161SAndreas Gruenbacher 3903b411b363SPhilipp Reisner p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3904063eacf8SJing Wang if (!p_uuid) { 3905d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of p_uuid failed\n"); 3906063eacf8SJing Wang return false; 3907063eacf8SJing Wang } 3908b411b363SPhilipp Reisner 3909b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 3910b411b363SPhilipp Reisner p_uuid[i] = be64_to_cpu(p->uuid[i]); 3911b411b363SPhilipp Reisner 3912b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 3913b30ab791SAndreas Gruenbacher device->p_uuid = p_uuid; 3914b411b363SPhilipp Reisner 3915b30ab791SAndreas Gruenbacher if (device->state.conn < C_CONNECTED && 3916b30ab791SAndreas Gruenbacher device->state.disk < D_INCONSISTENT && 3917b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && 3918b30ab791SAndreas Gruenbacher (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 3919d0180171SAndreas Gruenbacher drbd_err(device, "Can only connect to data with current UUID=%016llX\n", 3920b30ab791SAndreas Gruenbacher (unsigned long long)device->ed_uuid); 39219f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 392282bc0194SAndreas Gruenbacher return -EIO; 3923b411b363SPhilipp Reisner } 3924b411b363SPhilipp Reisner 3925b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3926b411b363SPhilipp Reisner int skip_initial_sync = 3927b30ab791SAndreas Gruenbacher device->state.conn == C_CONNECTED && 39289f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90 && 3929b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 3930b411b363SPhilipp Reisner (p_uuid[UI_FLAGS] & 8); 3931b411b363SPhilipp Reisner if (skip_initial_sync) { 3932d0180171SAndreas Gruenbacher drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); 3933b30ab791SAndreas Gruenbacher drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 393420ceb2b2SLars Ellenberg "clear_n_write from receive_uuids", 393520ceb2b2SLars Ellenberg BM_LOCKED_TEST_ALLOWED); 3936b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); 3937b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0); 3938b30ab791SAndreas Gruenbacher _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3939b411b363SPhilipp Reisner CS_VERBOSE, NULL); 3940b30ab791SAndreas Gruenbacher drbd_md_sync(device); 394162b0da3aSLars Ellenberg updated_uuids = 1; 3942b411b363SPhilipp Reisner } 3943b30ab791SAndreas Gruenbacher put_ldev(device); 3944b30ab791SAndreas Gruenbacher } else if (device->state.disk < D_INCONSISTENT && 3945b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY) { 394618a50fa2SPhilipp Reisner /* I am a diskless primary, the peer just created a new current UUID 394718a50fa2SPhilipp Reisner for me. */ 3948b30ab791SAndreas Gruenbacher updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 3949b411b363SPhilipp Reisner } 3950b411b363SPhilipp Reisner 3951b411b363SPhilipp Reisner /* Before we test for the disk state, we should wait until an eventually 3952b411b363SPhilipp Reisner ongoing cluster wide state change is finished. That is important if 3953b411b363SPhilipp Reisner we are primary and are detaching from our disk. We need to see the 3954b411b363SPhilipp Reisner new disk state... */ 3955b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 3956b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 3957b30ab791SAndreas Gruenbacher if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) 3958b30ab791SAndreas Gruenbacher updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 395962b0da3aSLars Ellenberg 396062b0da3aSLars Ellenberg if (updated_uuids) 3961b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "receiver updated UUIDs to"); 3962b411b363SPhilipp Reisner 396382bc0194SAndreas Gruenbacher return 0; 3964b411b363SPhilipp Reisner } 3965b411b363SPhilipp Reisner 3966b411b363SPhilipp Reisner /** 3967b411b363SPhilipp Reisner * convert_state() - Converts the peer's view of the cluster state to our point of view 3968b411b363SPhilipp Reisner * @ps: The state as seen by the peer. 3969b411b363SPhilipp Reisner */ 3970b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps) 3971b411b363SPhilipp Reisner { 3972b411b363SPhilipp Reisner union drbd_state ms; 3973b411b363SPhilipp Reisner 3974b411b363SPhilipp Reisner static enum drbd_conns c_tab[] = { 3975369bea63SPhilipp Reisner [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, 3976b411b363SPhilipp Reisner [C_CONNECTED] = C_CONNECTED, 3977b411b363SPhilipp Reisner 3978b411b363SPhilipp Reisner [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 3979b411b363SPhilipp Reisner [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, 3980b411b363SPhilipp Reisner [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ 3981b411b363SPhilipp Reisner [C_VERIFY_S] = C_VERIFY_T, 3982b411b363SPhilipp Reisner [C_MASK] = C_MASK, 3983b411b363SPhilipp Reisner }; 3984b411b363SPhilipp Reisner 3985b411b363SPhilipp Reisner ms.i = ps.i; 3986b411b363SPhilipp Reisner 3987b411b363SPhilipp Reisner ms.conn = c_tab[ps.conn]; 3988b411b363SPhilipp Reisner ms.peer = ps.role; 3989b411b363SPhilipp Reisner ms.role = ps.peer; 3990b411b363SPhilipp Reisner ms.pdsk = ps.disk; 3991b411b363SPhilipp Reisner ms.disk = ps.pdsk; 3992b411b363SPhilipp Reisner ms.peer_isp = (ps.aftr_isp | ps.user_isp); 3993b411b363SPhilipp Reisner 3994b411b363SPhilipp Reisner return ms; 3995b411b363SPhilipp Reisner } 3996b411b363SPhilipp Reisner 3997bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) 3998b411b363SPhilipp Reisner { 39999f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4000b30ab791SAndreas Gruenbacher struct drbd_device *device; 4001e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 4002b411b363SPhilipp Reisner union drbd_state mask, val; 4003bf885f8aSAndreas Gruenbacher enum drbd_state_rv rv; 4004b411b363SPhilipp Reisner 40059f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 40069f4fe9adSAndreas Gruenbacher if (!peer_device) 40074a76b161SAndreas Gruenbacher return -EIO; 40089f4fe9adSAndreas Gruenbacher device = peer_device->device; 40094a76b161SAndreas Gruenbacher 4010b411b363SPhilipp Reisner mask.i = be32_to_cpu(p->mask); 4011b411b363SPhilipp Reisner val.i = be32_to_cpu(p->val); 4012b411b363SPhilipp Reisner 40139f4fe9adSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && 4014b30ab791SAndreas Gruenbacher mutex_is_locked(device->state_mutex)) { 401569a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); 401682bc0194SAndreas Gruenbacher return 0; 4017b411b363SPhilipp Reisner } 4018b411b363SPhilipp Reisner 4019b411b363SPhilipp Reisner mask = convert_state(mask); 4020b411b363SPhilipp Reisner val = convert_state(val); 4021b411b363SPhilipp Reisner 4022b30ab791SAndreas Gruenbacher rv = drbd_change_state(device, CS_VERBOSE, mask, val); 402369a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, rv); 4024047cd4a6SPhilipp Reisner 4025b30ab791SAndreas Gruenbacher drbd_md_sync(device); 4026b411b363SPhilipp Reisner 402782bc0194SAndreas Gruenbacher return 0; 4028b411b363SPhilipp Reisner } 4029b411b363SPhilipp Reisner 4030bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) 4031b411b363SPhilipp Reisner { 4032e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 4033dfafcc8aSPhilipp Reisner union drbd_state mask, val; 4034dfafcc8aSPhilipp Reisner enum drbd_state_rv rv; 4035dfafcc8aSPhilipp Reisner 4036dfafcc8aSPhilipp Reisner mask.i = be32_to_cpu(p->mask); 4037dfafcc8aSPhilipp Reisner val.i = be32_to_cpu(p->val); 4038dfafcc8aSPhilipp Reisner 4039bde89a9eSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && 4040bde89a9eSAndreas Gruenbacher mutex_is_locked(&connection->cstate_mutex)) { 4041bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); 404282bc0194SAndreas Gruenbacher return 0; 4043dfafcc8aSPhilipp Reisner } 4044dfafcc8aSPhilipp Reisner 4045dfafcc8aSPhilipp Reisner mask = convert_state(mask); 4046dfafcc8aSPhilipp Reisner val = convert_state(val); 4047dfafcc8aSPhilipp Reisner 4048bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); 4049bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, rv); 4050dfafcc8aSPhilipp Reisner 405182bc0194SAndreas Gruenbacher return 0; 4052dfafcc8aSPhilipp Reisner } 4053dfafcc8aSPhilipp Reisner 4054bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi) 4055b411b363SPhilipp Reisner { 40569f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4057b30ab791SAndreas Gruenbacher struct drbd_device *device; 4058e658983aSAndreas Gruenbacher struct p_state *p = pi->data; 40594ac4aadaSLars Ellenberg union drbd_state os, ns, peer_state; 4060b411b363SPhilipp Reisner enum drbd_disk_state real_peer_disk; 406165d922c3SPhilipp Reisner enum chg_state_flags cs_flags; 4062b411b363SPhilipp Reisner int rv; 4063b411b363SPhilipp Reisner 40649f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 40659f4fe9adSAndreas Gruenbacher if (!peer_device) 4066bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 40679f4fe9adSAndreas Gruenbacher device = peer_device->device; 40684a76b161SAndreas Gruenbacher 4069b411b363SPhilipp Reisner peer_state.i = be32_to_cpu(p->state); 4070b411b363SPhilipp Reisner 4071b411b363SPhilipp Reisner real_peer_disk = peer_state.disk; 4072b411b363SPhilipp Reisner if (peer_state.disk == D_NEGOTIATING) { 4073b30ab791SAndreas Gruenbacher real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; 4074d0180171SAndreas Gruenbacher drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 4075b411b363SPhilipp Reisner } 4076b411b363SPhilipp Reisner 40770500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4078b411b363SPhilipp Reisner retry: 4079b30ab791SAndreas Gruenbacher os = ns = drbd_read_state(device); 40800500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4081b411b363SPhilipp Reisner 4082668700b4SPhilipp Reisner /* If some other part of the code (ack_receiver thread, timeout) 4083545752d5SLars Ellenberg * already decided to close the connection again, 4084545752d5SLars Ellenberg * we must not "re-establish" it here. */ 4085545752d5SLars Ellenberg if (os.conn <= C_TEAR_DOWN) 408658ffa580SLars Ellenberg return -ECONNRESET; 4087545752d5SLars Ellenberg 408840424e4aSLars Ellenberg /* If this is the "end of sync" confirmation, usually the peer disk 408940424e4aSLars Ellenberg * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 409040424e4aSLars Ellenberg * set) resync started in PausedSyncT, or if the timing of pause-/ 409140424e4aSLars Ellenberg * unpause-sync events has been "just right", the peer disk may 409240424e4aSLars Ellenberg * transition from D_CONSISTENT to D_UP_TO_DATE as well. 409340424e4aSLars Ellenberg */ 409440424e4aSLars Ellenberg if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && 409540424e4aSLars Ellenberg real_peer_disk == D_UP_TO_DATE && 4096e9ef7bb6SLars Ellenberg os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 4097e9ef7bb6SLars Ellenberg /* If we are (becoming) SyncSource, but peer is still in sync 4098e9ef7bb6SLars Ellenberg * preparation, ignore its uptodate-ness to avoid flapping, it 4099e9ef7bb6SLars Ellenberg * will change to inconsistent once the peer reaches active 4100e9ef7bb6SLars Ellenberg * syncing states. 4101e9ef7bb6SLars Ellenberg * It may have changed syncer-paused flags, however, so we 4102e9ef7bb6SLars Ellenberg * cannot ignore this completely. */ 4103e9ef7bb6SLars Ellenberg if (peer_state.conn > C_CONNECTED && 4104e9ef7bb6SLars Ellenberg peer_state.conn < C_SYNC_SOURCE) 4105e9ef7bb6SLars Ellenberg real_peer_disk = D_INCONSISTENT; 4106e9ef7bb6SLars Ellenberg 4107e9ef7bb6SLars Ellenberg /* if peer_state changes to connected at the same time, 4108e9ef7bb6SLars Ellenberg * it explicitly notifies us that it finished resync. 4109e9ef7bb6SLars Ellenberg * Maybe we should finish it up, too? */ 4110e9ef7bb6SLars Ellenberg else if (os.conn >= C_SYNC_SOURCE && 4111e9ef7bb6SLars Ellenberg peer_state.conn == C_CONNECTED) { 4112b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device) <= device->rs_failed) 4113b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 411482bc0194SAndreas Gruenbacher return 0; 4115e9ef7bb6SLars Ellenberg } 4116e9ef7bb6SLars Ellenberg } 4117e9ef7bb6SLars Ellenberg 411802b91b55SLars Ellenberg /* explicit verify finished notification, stop sector reached. */ 411902b91b55SLars Ellenberg if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && 412002b91b55SLars Ellenberg peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { 4121b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 4122b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 412358ffa580SLars Ellenberg return 0; 412402b91b55SLars Ellenberg } 412502b91b55SLars Ellenberg 4126e9ef7bb6SLars Ellenberg /* peer says his disk is inconsistent, while we think it is uptodate, 4127e9ef7bb6SLars Ellenberg * and this happens while the peer still thinks we have a sync going on, 4128e9ef7bb6SLars Ellenberg * but we think we are already done with the sync. 4129e9ef7bb6SLars Ellenberg * We ignore this to avoid flapping pdsk. 4130e9ef7bb6SLars Ellenberg * This should not happen, if the peer is a recent version of drbd. */ 4131e9ef7bb6SLars Ellenberg if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && 4132e9ef7bb6SLars Ellenberg os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) 4133e9ef7bb6SLars Ellenberg real_peer_disk = D_UP_TO_DATE; 4134e9ef7bb6SLars Ellenberg 41354ac4aadaSLars Ellenberg if (ns.conn == C_WF_REPORT_PARAMS) 41364ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4137b411b363SPhilipp Reisner 413867531718SPhilipp Reisner if (peer_state.conn == C_AHEAD) 413967531718SPhilipp Reisner ns.conn = C_BEHIND; 414067531718SPhilipp Reisner 4141b30ab791SAndreas Gruenbacher if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 4142b30ab791SAndreas Gruenbacher get_ldev_if_state(device, D_NEGOTIATING)) { 4143b411b363SPhilipp Reisner int cr; /* consider resync */ 4144b411b363SPhilipp Reisner 4145b411b363SPhilipp Reisner /* if we established a new connection */ 41464ac4aadaSLars Ellenberg cr = (os.conn < C_CONNECTED); 4147b411b363SPhilipp Reisner /* if we had an established connection 4148b411b363SPhilipp Reisner * and one of the nodes newly attaches a disk */ 41494ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4150b411b363SPhilipp Reisner (peer_state.disk == D_NEGOTIATING || 41514ac4aadaSLars Ellenberg os.disk == D_NEGOTIATING)); 4152b411b363SPhilipp Reisner /* if we have both been inconsistent, and the peer has been 4153b411b363SPhilipp Reisner * forced to be UpToDate with --overwrite-data */ 4154b30ab791SAndreas Gruenbacher cr |= test_bit(CONSIDER_RESYNC, &device->flags); 4155b411b363SPhilipp Reisner /* if we had been plain connected, and the admin requested to 4156b411b363SPhilipp Reisner * start a sync by "invalidate" or "invalidate-remote" */ 41574ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4158b411b363SPhilipp Reisner (peer_state.conn >= C_STARTING_SYNC_S && 4159b411b363SPhilipp Reisner peer_state.conn <= C_WF_BITMAP_T)); 4160b411b363SPhilipp Reisner 4161b411b363SPhilipp Reisner if (cr) 416269a22773SAndreas Gruenbacher ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); 4163b411b363SPhilipp Reisner 4164b30ab791SAndreas Gruenbacher put_ldev(device); 41654ac4aadaSLars Ellenberg if (ns.conn == C_MASK) { 41664ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4167b30ab791SAndreas Gruenbacher if (device->state.disk == D_NEGOTIATING) { 4168b30ab791SAndreas Gruenbacher drbd_force_state(device, NS(disk, D_FAILED)); 4169b411b363SPhilipp Reisner } else if (peer_state.disk == D_NEGOTIATING) { 4170d0180171SAndreas Gruenbacher drbd_err(device, "Disk attach process on the peer node was aborted.\n"); 4171b411b363SPhilipp Reisner peer_state.disk = D_DISKLESS; 4172580b9767SLars Ellenberg real_peer_disk = D_DISKLESS; 4173b411b363SPhilipp Reisner } else { 41749f4fe9adSAndreas Gruenbacher if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) 417582bc0194SAndreas Gruenbacher return -EIO; 41760b0ba1efSAndreas Gruenbacher D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); 41779f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 417882bc0194SAndreas Gruenbacher return -EIO; 4179b411b363SPhilipp Reisner } 4180b411b363SPhilipp Reisner } 4181b411b363SPhilipp Reisner } 4182b411b363SPhilipp Reisner 41830500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4184b30ab791SAndreas Gruenbacher if (os.i != drbd_read_state(device).i) 4185b411b363SPhilipp Reisner goto retry; 4186b30ab791SAndreas Gruenbacher clear_bit(CONSIDER_RESYNC, &device->flags); 4187b411b363SPhilipp Reisner ns.peer = peer_state.role; 4188b411b363SPhilipp Reisner ns.pdsk = real_peer_disk; 4189b411b363SPhilipp Reisner ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 41904ac4aadaSLars Ellenberg if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 4191b30ab791SAndreas Gruenbacher ns.disk = device->new_state_tmp.disk; 41924ac4aadaSLars Ellenberg cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 4193b30ab791SAndreas Gruenbacher if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 4194b30ab791SAndreas Gruenbacher test_bit(NEW_CUR_UUID, &device->flags)) { 41958554df1cSAndreas Gruenbacher /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this 4196481c6f50SPhilipp Reisner for temporal network outages! */ 41970500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4198d0180171SAndreas Gruenbacher drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 41999f4fe9adSAndreas Gruenbacher tl_clear(peer_device->connection); 4200b30ab791SAndreas Gruenbacher drbd_uuid_new_current(device); 4201b30ab791SAndreas Gruenbacher clear_bit(NEW_CUR_UUID, &device->flags); 42029f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); 420382bc0194SAndreas Gruenbacher return -EIO; 4204481c6f50SPhilipp Reisner } 4205b30ab791SAndreas Gruenbacher rv = _drbd_set_state(device, ns, cs_flags, NULL); 4206b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 42070500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4208b411b363SPhilipp Reisner 4209b411b363SPhilipp Reisner if (rv < SS_SUCCESS) { 42109f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 421182bc0194SAndreas Gruenbacher return -EIO; 4212b411b363SPhilipp Reisner } 4213b411b363SPhilipp Reisner 42144ac4aadaSLars Ellenberg if (os.conn > C_WF_REPORT_PARAMS) { 42154ac4aadaSLars Ellenberg if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 4216b411b363SPhilipp Reisner peer_state.disk != D_NEGOTIATING ) { 4217b411b363SPhilipp Reisner /* we want resync, peer has not yet decided to sync... */ 4218b411b363SPhilipp Reisner /* Nowadays only used when forcing a node into primary role and 4219b411b363SPhilipp Reisner setting its disk to UpToDate with that */ 422069a22773SAndreas Gruenbacher drbd_send_uuids(peer_device); 422169a22773SAndreas Gruenbacher drbd_send_current_state(peer_device); 4222b411b363SPhilipp Reisner } 4223b411b363SPhilipp Reisner } 4224b411b363SPhilipp Reisner 4225b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 4226b411b363SPhilipp Reisner 4227b30ab791SAndreas Gruenbacher drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ 4228b411b363SPhilipp Reisner 422982bc0194SAndreas Gruenbacher return 0; 4230b411b363SPhilipp Reisner } 4231b411b363SPhilipp Reisner 4232bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) 4233b411b363SPhilipp Reisner { 42349f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4235b30ab791SAndreas Gruenbacher struct drbd_device *device; 4236e658983aSAndreas Gruenbacher struct p_rs_uuid *p = pi->data; 42374a76b161SAndreas Gruenbacher 42389f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 42399f4fe9adSAndreas Gruenbacher if (!peer_device) 42404a76b161SAndreas Gruenbacher return -EIO; 42419f4fe9adSAndreas Gruenbacher device = peer_device->device; 4242b411b363SPhilipp Reisner 4243b30ab791SAndreas Gruenbacher wait_event(device->misc_wait, 4244b30ab791SAndreas Gruenbacher device->state.conn == C_WF_SYNC_UUID || 4245b30ab791SAndreas Gruenbacher device->state.conn == C_BEHIND || 4246b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED || 4247b30ab791SAndreas Gruenbacher device->state.disk < D_NEGOTIATING); 4248b411b363SPhilipp Reisner 42490b0ba1efSAndreas Gruenbacher /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ 4250b411b363SPhilipp Reisner 4251b411b363SPhilipp Reisner /* Here the _drbd_uuid_ functions are right, current should 4252b411b363SPhilipp Reisner _not_ be rotated into the history */ 4253b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_NEGOTIATING)) { 4254b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); 4255b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0UL); 4256b411b363SPhilipp Reisner 4257b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated sync uuid"); 4258b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_TARGET); 4259b411b363SPhilipp Reisner 4260b30ab791SAndreas Gruenbacher put_ldev(device); 4261b411b363SPhilipp Reisner } else 4262d0180171SAndreas Gruenbacher drbd_err(device, "Ignoring SyncUUID packet!\n"); 4263b411b363SPhilipp Reisner 426482bc0194SAndreas Gruenbacher return 0; 4265b411b363SPhilipp Reisner } 4266b411b363SPhilipp Reisner 42672c46407dSAndreas Gruenbacher /** 42682c46407dSAndreas Gruenbacher * receive_bitmap_plain 42692c46407dSAndreas Gruenbacher * 42702c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 42712c46407dSAndreas Gruenbacher * code upon failure. 42722c46407dSAndreas Gruenbacher */ 42732c46407dSAndreas Gruenbacher static int 427469a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, 4275e658983aSAndreas Gruenbacher unsigned long *p, struct bm_xfer_ctx *c) 4276b411b363SPhilipp Reisner { 427750d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - 427869a22773SAndreas Gruenbacher drbd_header_size(peer_device->connection); 4279e658983aSAndreas Gruenbacher unsigned int num_words = min_t(size_t, data_size / sizeof(*p), 428050d0b1adSAndreas Gruenbacher c->bm_words - c->word_offset); 4281e658983aSAndreas Gruenbacher unsigned int want = num_words * sizeof(*p); 42822c46407dSAndreas Gruenbacher int err; 4283b411b363SPhilipp Reisner 428450d0b1adSAndreas Gruenbacher if (want != size) { 428569a22773SAndreas Gruenbacher drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); 42862c46407dSAndreas Gruenbacher return -EIO; 4287b411b363SPhilipp Reisner } 4288b411b363SPhilipp Reisner if (want == 0) 42892c46407dSAndreas Gruenbacher return 0; 429069a22773SAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, want); 429182bc0194SAndreas Gruenbacher if (err) 42922c46407dSAndreas Gruenbacher return err; 4293b411b363SPhilipp Reisner 429469a22773SAndreas Gruenbacher drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); 4295b411b363SPhilipp Reisner 4296b411b363SPhilipp Reisner c->word_offset += num_words; 4297b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG; 4298b411b363SPhilipp Reisner if (c->bit_offset > c->bm_bits) 4299b411b363SPhilipp Reisner c->bit_offset = c->bm_bits; 4300b411b363SPhilipp Reisner 43012c46407dSAndreas Gruenbacher return 1; 4302b411b363SPhilipp Reisner } 4303b411b363SPhilipp Reisner 4304a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) 4305a02d1240SAndreas Gruenbacher { 4306a02d1240SAndreas Gruenbacher return (enum drbd_bitmap_code)(p->encoding & 0x0f); 4307a02d1240SAndreas Gruenbacher } 4308a02d1240SAndreas Gruenbacher 4309a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p) 4310a02d1240SAndreas Gruenbacher { 4311a02d1240SAndreas Gruenbacher return (p->encoding & 0x80) != 0; 4312a02d1240SAndreas Gruenbacher } 4313a02d1240SAndreas Gruenbacher 4314a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p) 4315a02d1240SAndreas Gruenbacher { 4316a02d1240SAndreas Gruenbacher return (p->encoding >> 4) & 0x7; 4317a02d1240SAndreas Gruenbacher } 4318a02d1240SAndreas Gruenbacher 43192c46407dSAndreas Gruenbacher /** 43202c46407dSAndreas Gruenbacher * recv_bm_rle_bits 43212c46407dSAndreas Gruenbacher * 43222c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 43232c46407dSAndreas Gruenbacher * code upon failure. 43242c46407dSAndreas Gruenbacher */ 43252c46407dSAndreas Gruenbacher static int 432669a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device, 4327b411b363SPhilipp Reisner struct p_compressed_bm *p, 4328c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4329c6d25cfeSPhilipp Reisner unsigned int len) 4330b411b363SPhilipp Reisner { 4331b411b363SPhilipp Reisner struct bitstream bs; 4332b411b363SPhilipp Reisner u64 look_ahead; 4333b411b363SPhilipp Reisner u64 rl; 4334b411b363SPhilipp Reisner u64 tmp; 4335b411b363SPhilipp Reisner unsigned long s = c->bit_offset; 4336b411b363SPhilipp Reisner unsigned long e; 4337a02d1240SAndreas Gruenbacher int toggle = dcbp_get_start(p); 4338b411b363SPhilipp Reisner int have; 4339b411b363SPhilipp Reisner int bits; 4340b411b363SPhilipp Reisner 4341a02d1240SAndreas Gruenbacher bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); 4342b411b363SPhilipp Reisner 4343b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &look_ahead, 64); 4344b411b363SPhilipp Reisner if (bits < 0) 43452c46407dSAndreas Gruenbacher return -EIO; 4346b411b363SPhilipp Reisner 4347b411b363SPhilipp Reisner for (have = bits; have > 0; s += rl, toggle = !toggle) { 4348b411b363SPhilipp Reisner bits = vli_decode_bits(&rl, look_ahead); 4349b411b363SPhilipp Reisner if (bits <= 0) 43502c46407dSAndreas Gruenbacher return -EIO; 4351b411b363SPhilipp Reisner 4352b411b363SPhilipp Reisner if (toggle) { 4353b411b363SPhilipp Reisner e = s + rl -1; 4354b411b363SPhilipp Reisner if (e >= c->bm_bits) { 435569a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 43562c46407dSAndreas Gruenbacher return -EIO; 4357b411b363SPhilipp Reisner } 435869a22773SAndreas Gruenbacher _drbd_bm_set_bits(peer_device->device, s, e); 4359b411b363SPhilipp Reisner } 4360b411b363SPhilipp Reisner 4361b411b363SPhilipp Reisner if (have < bits) { 436269a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", 4363b411b363SPhilipp Reisner have, bits, look_ahead, 4364b411b363SPhilipp Reisner (unsigned int)(bs.cur.b - p->code), 4365b411b363SPhilipp Reisner (unsigned int)bs.buf_len); 43662c46407dSAndreas Gruenbacher return -EIO; 4367b411b363SPhilipp Reisner } 4368d2da5b0cSLars Ellenberg /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */ 4369d2da5b0cSLars Ellenberg if (likely(bits < 64)) 4370b411b363SPhilipp Reisner look_ahead >>= bits; 4371d2da5b0cSLars Ellenberg else 4372d2da5b0cSLars Ellenberg look_ahead = 0; 4373b411b363SPhilipp Reisner have -= bits; 4374b411b363SPhilipp Reisner 4375b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &tmp, 64 - have); 4376b411b363SPhilipp Reisner if (bits < 0) 43772c46407dSAndreas Gruenbacher return -EIO; 4378b411b363SPhilipp Reisner look_ahead |= tmp << have; 4379b411b363SPhilipp Reisner have += bits; 4380b411b363SPhilipp Reisner } 4381b411b363SPhilipp Reisner 4382b411b363SPhilipp Reisner c->bit_offset = s; 4383b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c); 4384b411b363SPhilipp Reisner 43852c46407dSAndreas Gruenbacher return (s != c->bm_bits); 4386b411b363SPhilipp Reisner } 4387b411b363SPhilipp Reisner 43882c46407dSAndreas Gruenbacher /** 43892c46407dSAndreas Gruenbacher * decode_bitmap_c 43902c46407dSAndreas Gruenbacher * 43912c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 43922c46407dSAndreas Gruenbacher * code upon failure. 43932c46407dSAndreas Gruenbacher */ 43942c46407dSAndreas Gruenbacher static int 439569a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device, 4396b411b363SPhilipp Reisner struct p_compressed_bm *p, 4397c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4398c6d25cfeSPhilipp Reisner unsigned int len) 4399b411b363SPhilipp Reisner { 4400a02d1240SAndreas Gruenbacher if (dcbp_get_code(p) == RLE_VLI_Bits) 440169a22773SAndreas Gruenbacher return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); 4402b411b363SPhilipp Reisner 4403b411b363SPhilipp Reisner /* other variants had been implemented for evaluation, 4404b411b363SPhilipp Reisner * but have been dropped as this one turned out to be "best" 4405b411b363SPhilipp Reisner * during all our tests. */ 4406b411b363SPhilipp Reisner 440769a22773SAndreas Gruenbacher drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 440869a22773SAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 44092c46407dSAndreas Gruenbacher return -EIO; 4410b411b363SPhilipp Reisner } 4411b411b363SPhilipp Reisner 4412b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device, 4413b411b363SPhilipp Reisner const char *direction, struct bm_xfer_ctx *c) 4414b411b363SPhilipp Reisner { 4415b411b363SPhilipp Reisner /* what would it take to transfer it "plaintext" */ 4416a6b32bc3SAndreas Gruenbacher unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); 441750d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 441850d0b1adSAndreas Gruenbacher unsigned int plain = 441950d0b1adSAndreas Gruenbacher header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + 442050d0b1adSAndreas Gruenbacher c->bm_words * sizeof(unsigned long); 442150d0b1adSAndreas Gruenbacher unsigned int total = c->bytes[0] + c->bytes[1]; 442250d0b1adSAndreas Gruenbacher unsigned int r; 4423b411b363SPhilipp Reisner 4424b411b363SPhilipp Reisner /* total can not be zero. but just in case: */ 4425b411b363SPhilipp Reisner if (total == 0) 4426b411b363SPhilipp Reisner return; 4427b411b363SPhilipp Reisner 4428b411b363SPhilipp Reisner /* don't report if not compressed */ 4429b411b363SPhilipp Reisner if (total >= plain) 4430b411b363SPhilipp Reisner return; 4431b411b363SPhilipp Reisner 4432b411b363SPhilipp Reisner /* total < plain. check for overflow, still */ 4433b411b363SPhilipp Reisner r = (total > UINT_MAX/1000) ? (total / (plain/1000)) 4434b411b363SPhilipp Reisner : (1000 * total / plain); 4435b411b363SPhilipp Reisner 4436b411b363SPhilipp Reisner if (r > 1000) 4437b411b363SPhilipp Reisner r = 1000; 4438b411b363SPhilipp Reisner 4439b411b363SPhilipp Reisner r = 1000 - r; 4440d0180171SAndreas Gruenbacher drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " 4441b411b363SPhilipp Reisner "total %u; compression: %u.%u%%\n", 4442b411b363SPhilipp Reisner direction, 4443b411b363SPhilipp Reisner c->bytes[1], c->packets[1], 4444b411b363SPhilipp Reisner c->bytes[0], c->packets[0], 4445b411b363SPhilipp Reisner total, r/10, r % 10); 4446b411b363SPhilipp Reisner } 4447b411b363SPhilipp Reisner 4448b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher, 4449b411b363SPhilipp Reisner it does not matter if the process it in 32 bit chunks or 64 bit 4450b411b363SPhilipp Reisner chunks as long as it is little endian. (Understand it as byte stream, 4451b411b363SPhilipp Reisner beginning with the lowest byte...) If we would use big endian 4452b411b363SPhilipp Reisner we would need to process it from the highest address to the lowest, 4453b411b363SPhilipp Reisner in order to be agnostic to the 32 vs 64 bits issue. 4454b411b363SPhilipp Reisner 4455b411b363SPhilipp Reisner returns 0 on failure, 1 if we successfully received it. */ 4456bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) 4457b411b363SPhilipp Reisner { 44589f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4459b30ab791SAndreas Gruenbacher struct drbd_device *device; 4460b411b363SPhilipp Reisner struct bm_xfer_ctx c; 44612c46407dSAndreas Gruenbacher int err; 44624a76b161SAndreas Gruenbacher 44639f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 44649f4fe9adSAndreas Gruenbacher if (!peer_device) 44654a76b161SAndreas Gruenbacher return -EIO; 44669f4fe9adSAndreas Gruenbacher device = peer_device->device; 4467b411b363SPhilipp Reisner 4468b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); 446920ceb2b2SLars Ellenberg /* you are supposed to send additional out-of-sync information 447020ceb2b2SLars Ellenberg * if you actually set bits during this phase */ 4471b411b363SPhilipp Reisner 4472b411b363SPhilipp Reisner c = (struct bm_xfer_ctx) { 4473b30ab791SAndreas Gruenbacher .bm_bits = drbd_bm_bits(device), 4474b30ab791SAndreas Gruenbacher .bm_words = drbd_bm_words(device), 4475b411b363SPhilipp Reisner }; 4476b411b363SPhilipp Reisner 44772c46407dSAndreas Gruenbacher for(;;) { 4478e658983aSAndreas Gruenbacher if (pi->cmd == P_BITMAP) 447969a22773SAndreas Gruenbacher err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); 4480e658983aSAndreas Gruenbacher else if (pi->cmd == P_COMPRESSED_BITMAP) { 4481b411b363SPhilipp Reisner /* MAYBE: sanity check that we speak proto >= 90, 4482b411b363SPhilipp Reisner * and the feature is enabled! */ 4483e658983aSAndreas Gruenbacher struct p_compressed_bm *p = pi->data; 4484b411b363SPhilipp Reisner 4485bde89a9eSAndreas Gruenbacher if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { 4486d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too large\n"); 448782bc0194SAndreas Gruenbacher err = -EIO; 4488b411b363SPhilipp Reisner goto out; 4489b411b363SPhilipp Reisner } 4490e658983aSAndreas Gruenbacher if (pi->size <= sizeof(*p)) { 4491d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); 449282bc0194SAndreas Gruenbacher err = -EIO; 449378fcbdaeSAndreas Gruenbacher goto out; 4494b411b363SPhilipp Reisner } 44959f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, pi->size); 4496e658983aSAndreas Gruenbacher if (err) 4497e658983aSAndreas Gruenbacher goto out; 449869a22773SAndreas Gruenbacher err = decode_bitmap_c(peer_device, p, &c, pi->size); 4499b411b363SPhilipp Reisner } else { 4500d0180171SAndreas Gruenbacher drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); 450182bc0194SAndreas Gruenbacher err = -EIO; 4502b411b363SPhilipp Reisner goto out; 4503b411b363SPhilipp Reisner } 4504b411b363SPhilipp Reisner 4505e2857216SAndreas Gruenbacher c.packets[pi->cmd == P_BITMAP]++; 4506bde89a9eSAndreas Gruenbacher c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; 4507b411b363SPhilipp Reisner 45082c46407dSAndreas Gruenbacher if (err <= 0) { 45092c46407dSAndreas Gruenbacher if (err < 0) 45102c46407dSAndreas Gruenbacher goto out; 4511b411b363SPhilipp Reisner break; 45122c46407dSAndreas Gruenbacher } 45139f4fe9adSAndreas Gruenbacher err = drbd_recv_header(peer_device->connection, pi); 451482bc0194SAndreas Gruenbacher if (err) 4515b411b363SPhilipp Reisner goto out; 45162c46407dSAndreas Gruenbacher } 4517b411b363SPhilipp Reisner 4518b30ab791SAndreas Gruenbacher INFO_bm_xfer_stats(device, "receive", &c); 4519b411b363SPhilipp Reisner 4520b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_BITMAP_T) { 4521de1f8e4aSAndreas Gruenbacher enum drbd_state_rv rv; 4522de1f8e4aSAndreas Gruenbacher 4523b30ab791SAndreas Gruenbacher err = drbd_send_bitmap(device); 452482bc0194SAndreas Gruenbacher if (err) 4525b411b363SPhilipp Reisner goto out; 4526b411b363SPhilipp Reisner /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 4527b30ab791SAndreas Gruenbacher rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 45280b0ba1efSAndreas Gruenbacher D_ASSERT(device, rv == SS_SUCCESS); 4529b30ab791SAndreas Gruenbacher } else if (device->state.conn != C_WF_BITMAP_S) { 4530b411b363SPhilipp Reisner /* admin may have requested C_DISCONNECTING, 4531b411b363SPhilipp Reisner * other threads may have noticed network errors */ 4532d0180171SAndreas Gruenbacher drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", 4533b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4534b411b363SPhilipp Reisner } 453582bc0194SAndreas Gruenbacher err = 0; 4536b411b363SPhilipp Reisner 4537b411b363SPhilipp Reisner out: 4538b30ab791SAndreas Gruenbacher drbd_bm_unlock(device); 4539b30ab791SAndreas Gruenbacher if (!err && device->state.conn == C_WF_BITMAP_S) 4540b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 454182bc0194SAndreas Gruenbacher return err; 4542b411b363SPhilipp Reisner } 4543b411b363SPhilipp Reisner 4544bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi) 4545b411b363SPhilipp Reisner { 45461ec861ebSAndreas Gruenbacher drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", 4547e2857216SAndreas Gruenbacher pi->cmd, pi->size); 4548b411b363SPhilipp Reisner 4549bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 4550b411b363SPhilipp Reisner } 4551b411b363SPhilipp Reisner 4552bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) 4553b411b363SPhilipp Reisner { 4554b411b363SPhilipp Reisner /* Make sure we've acked all the TCP data associated 4555b411b363SPhilipp Reisner * with the data requests being unplugged */ 4556bde89a9eSAndreas Gruenbacher drbd_tcp_quickack(connection->data.socket); 4557b411b363SPhilipp Reisner 455882bc0194SAndreas Gruenbacher return 0; 4559b411b363SPhilipp Reisner } 4560b411b363SPhilipp Reisner 4561bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) 456273a01a18SPhilipp Reisner { 45639f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4564b30ab791SAndreas Gruenbacher struct drbd_device *device; 4565e658983aSAndreas Gruenbacher struct p_block_desc *p = pi->data; 45664a76b161SAndreas Gruenbacher 45679f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 45689f4fe9adSAndreas Gruenbacher if (!peer_device) 45694a76b161SAndreas Gruenbacher return -EIO; 45709f4fe9adSAndreas Gruenbacher device = peer_device->device; 457173a01a18SPhilipp Reisner 4572b30ab791SAndreas Gruenbacher switch (device->state.conn) { 4573f735e363SLars Ellenberg case C_WF_SYNC_UUID: 4574f735e363SLars Ellenberg case C_WF_BITMAP_T: 4575f735e363SLars Ellenberg case C_BEHIND: 4576f735e363SLars Ellenberg break; 4577f735e363SLars Ellenberg default: 4578d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", 4579b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4580f735e363SLars Ellenberg } 4581f735e363SLars Ellenberg 4582b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 458373a01a18SPhilipp Reisner 458482bc0194SAndreas Gruenbacher return 0; 458573a01a18SPhilipp Reisner } 458673a01a18SPhilipp Reisner 458702918be2SPhilipp Reisner struct data_cmd { 458802918be2SPhilipp Reisner int expect_payload; 458902918be2SPhilipp Reisner size_t pkt_size; 4590bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *, struct packet_info *); 4591b411b363SPhilipp Reisner }; 4592b411b363SPhilipp Reisner 459302918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = { 459402918be2SPhilipp Reisner [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, 459502918be2SPhilipp Reisner [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 459602918be2SPhilipp Reisner [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 459702918be2SPhilipp Reisner [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 4598e658983aSAndreas Gruenbacher [P_BITMAP] = { 1, 0, receive_bitmap } , 4599e658983aSAndreas Gruenbacher [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , 4600e658983aSAndreas Gruenbacher [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, 460102918be2SPhilipp Reisner [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 460202918be2SPhilipp Reisner [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 4603e658983aSAndreas Gruenbacher [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, 4604e658983aSAndreas Gruenbacher [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, 460502918be2SPhilipp Reisner [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 460602918be2SPhilipp Reisner [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 460702918be2SPhilipp Reisner [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 460802918be2SPhilipp Reisner [P_STATE] = { 0, sizeof(struct p_state), receive_state }, 460902918be2SPhilipp Reisner [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, 461002918be2SPhilipp Reisner [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, 461102918be2SPhilipp Reisner [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 461202918be2SPhilipp Reisner [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 461302918be2SPhilipp Reisner [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 461402918be2SPhilipp Reisner [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 461573a01a18SPhilipp Reisner [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 46164a76b161SAndreas Gruenbacher [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 4617036b17eaSPhilipp Reisner [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 4618a0fb3c47SLars Ellenberg [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 461902918be2SPhilipp Reisner }; 462002918be2SPhilipp Reisner 4621bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection) 4622b411b363SPhilipp Reisner { 462377351055SPhilipp Reisner struct packet_info pi; 462402918be2SPhilipp Reisner size_t shs; /* sub header size */ 462582bc0194SAndreas Gruenbacher int err; 4626b411b363SPhilipp Reisner 4627bde89a9eSAndreas Gruenbacher while (get_t_state(&connection->receiver) == RUNNING) { 4628deebe195SAndreas Gruenbacher struct data_cmd *cmd; 4629deebe195SAndreas Gruenbacher 4630bde89a9eSAndreas Gruenbacher drbd_thread_current_set_cpu(&connection->receiver); 4631944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_header); 4632bde89a9eSAndreas Gruenbacher if (drbd_recv_header(connection, &pi)) 463302918be2SPhilipp Reisner goto err_out; 463402918be2SPhilipp Reisner 4635deebe195SAndreas Gruenbacher cmd = &drbd_cmd_handler[pi.cmd]; 46364a76b161SAndreas Gruenbacher if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { 46371ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected data packet %s (0x%04x)", 46382fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 463902918be2SPhilipp Reisner goto err_out; 46400b33a916SLars Ellenberg } 4641b411b363SPhilipp Reisner 4642e658983aSAndreas Gruenbacher shs = cmd->pkt_size; 4643e658983aSAndreas Gruenbacher if (pi.size > shs && !cmd->expect_payload) { 46441ec861ebSAndreas Gruenbacher drbd_err(connection, "No payload expected %s l:%d\n", 46452fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.size); 4646c13f7e1aSLars Ellenberg goto err_out; 4647c13f7e1aSLars Ellenberg } 4648c13f7e1aSLars Ellenberg 4649c13f7e1aSLars Ellenberg if (shs) { 4650944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_all_warn); 4651bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, pi.data, shs); 4652a5c31904SAndreas Gruenbacher if (err) 465302918be2SPhilipp Reisner goto err_out; 4654e2857216SAndreas Gruenbacher pi.size -= shs; 4655b411b363SPhilipp Reisner } 465602918be2SPhilipp Reisner 4657944410e9SLars Ellenberg update_receiver_timing_details(connection, cmd->fn); 4658bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 46594a76b161SAndreas Gruenbacher if (err) { 46601ec861ebSAndreas Gruenbacher drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 46619f5bdc33SAndreas Gruenbacher cmdname(pi.cmd), err, pi.size); 466202918be2SPhilipp Reisner goto err_out; 466302918be2SPhilipp Reisner } 466402918be2SPhilipp Reisner } 466582bc0194SAndreas Gruenbacher return; 466602918be2SPhilipp Reisner 466702918be2SPhilipp Reisner err_out: 4668bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 4669b411b363SPhilipp Reisner } 4670b411b363SPhilipp Reisner 4671bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection) 4672f70b3511SPhilipp Reisner { 4673c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4674bbeb641cSPhilipp Reisner enum drbd_conns oc; 4675376694a0SPhilipp Reisner int vnr; 4676f70b3511SPhilipp Reisner 4677bde89a9eSAndreas Gruenbacher if (connection->cstate == C_STANDALONE) 4678b411b363SPhilipp Reisner return; 4679b411b363SPhilipp Reisner 4680545752d5SLars Ellenberg /* We are about to start the cleanup after connection loss. 4681545752d5SLars Ellenberg * Make sure drbd_make_request knows about that. 4682545752d5SLars Ellenberg * Usually we should be in some network failure state already, 4683545752d5SLars Ellenberg * but just in case we are not, we fix it up here. 4684545752d5SLars Ellenberg */ 4685bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 4686545752d5SLars Ellenberg 4687668700b4SPhilipp Reisner /* ack_receiver does not clean up anything. it must not interfere, either */ 46881c03e520SPhilipp Reisner drbd_thread_stop(&connection->ack_receiver); 4689668700b4SPhilipp Reisner if (connection->ack_sender) { 4690668700b4SPhilipp Reisner destroy_workqueue(connection->ack_sender); 4691668700b4SPhilipp Reisner connection->ack_sender = NULL; 4692668700b4SPhilipp Reisner } 4693bde89a9eSAndreas Gruenbacher drbd_free_sock(connection); 4694360cc740SPhilipp Reisner 4695c141ebdaSPhilipp Reisner rcu_read_lock(); 4696c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 4697c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 4698b30ab791SAndreas Gruenbacher kref_get(&device->kref); 4699c141ebdaSPhilipp Reisner rcu_read_unlock(); 470069a22773SAndreas Gruenbacher drbd_disconnected(peer_device); 4701c06ece6bSAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 4702c141ebdaSPhilipp Reisner rcu_read_lock(); 4703c141ebdaSPhilipp Reisner } 4704c141ebdaSPhilipp Reisner rcu_read_unlock(); 4705c141ebdaSPhilipp Reisner 4706bde89a9eSAndreas Gruenbacher if (!list_empty(&connection->current_epoch->list)) 47071ec861ebSAndreas Gruenbacher drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); 470812038a3aSPhilipp Reisner /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 4709bde89a9eSAndreas Gruenbacher atomic_set(&connection->current_epoch->epoch_size, 0); 4710bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = false; 471112038a3aSPhilipp Reisner 47121ec861ebSAndreas Gruenbacher drbd_info(connection, "Connection closed\n"); 4713360cc740SPhilipp Reisner 4714bde89a9eSAndreas Gruenbacher if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) 4715bde89a9eSAndreas Gruenbacher conn_try_outdate_peer_async(connection); 4716cb703454SPhilipp Reisner 47170500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 4718bde89a9eSAndreas Gruenbacher oc = connection->cstate; 4719bbeb641cSPhilipp Reisner if (oc >= C_UNCONNECTED) 4720bde89a9eSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 4721bbeb641cSPhilipp Reisner 47220500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 4723360cc740SPhilipp Reisner 4724f3dfa40aSLars Ellenberg if (oc == C_DISCONNECTING) 4725bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); 4726360cc740SPhilipp Reisner } 4727360cc740SPhilipp Reisner 472869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device) 4729360cc740SPhilipp Reisner { 473069a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 4731360cc740SPhilipp Reisner unsigned int i; 4732b411b363SPhilipp Reisner 473385719573SPhilipp Reisner /* wait for current activity to cease. */ 47340500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4735b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->active_ee); 4736b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->sync_ee); 4737b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->read_ee); 47380500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4739b411b363SPhilipp Reisner 4740b411b363SPhilipp Reisner /* We do not have data structures that would allow us to 4741b411b363SPhilipp Reisner * get the rs_pending_cnt down to 0 again. 4742b411b363SPhilipp Reisner * * On C_SYNC_TARGET we do not have any data structures describing 4743b411b363SPhilipp Reisner * the pending RSDataRequest's we have sent. 4744b411b363SPhilipp Reisner * * On C_SYNC_SOURCE there is no data structure that tracks 4745b411b363SPhilipp Reisner * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. 4746b411b363SPhilipp Reisner * And no, it is not the sum of the reference counts in the 4747b411b363SPhilipp Reisner * resync_LRU. The resync_LRU tracks the whole operation including 4748b411b363SPhilipp Reisner * the disk-IO, while the rs_pending_cnt only tracks the blocks 4749b411b363SPhilipp Reisner * on the fly. */ 4750b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 4751b30ab791SAndreas Gruenbacher device->rs_total = 0; 4752b30ab791SAndreas Gruenbacher device->rs_failed = 0; 4753b30ab791SAndreas Gruenbacher atomic_set(&device->rs_pending_cnt, 0); 4754b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 4755b411b363SPhilipp Reisner 4756b30ab791SAndreas Gruenbacher del_timer_sync(&device->resync_timer); 4757b30ab791SAndreas Gruenbacher resync_timer_fn((unsigned long)device); 4758b411b363SPhilipp Reisner 4759b411b363SPhilipp Reisner /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 4760b411b363SPhilipp Reisner * w_make_resync_request etc. which may still be on the worker queue 4761b411b363SPhilipp Reisner * to be "canceled" */ 4762b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 4763b411b363SPhilipp Reisner 4764b30ab791SAndreas Gruenbacher drbd_finish_peer_reqs(device); 4765b411b363SPhilipp Reisner 4766d10b4ea3SPhilipp Reisner /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() 4767d10b4ea3SPhilipp Reisner might have issued a work again. The one before drbd_finish_peer_reqs() is 4768d10b4ea3SPhilipp Reisner necessary to reclain net_ee in drbd_finish_peer_reqs(). */ 4769b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 4770d10b4ea3SPhilipp Reisner 477108332d73SLars Ellenberg /* need to do it again, drbd_finish_peer_reqs() may have populated it 477208332d73SLars Ellenberg * again via drbd_try_clear_on_disk_bm(). */ 4773b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 4774b411b363SPhilipp Reisner 4775b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 4776b30ab791SAndreas Gruenbacher device->p_uuid = NULL; 4777b411b363SPhilipp Reisner 4778b30ab791SAndreas Gruenbacher if (!drbd_suspended(device)) 477969a22773SAndreas Gruenbacher tl_clear(peer_device->connection); 4780b411b363SPhilipp Reisner 4781b30ab791SAndreas Gruenbacher drbd_md_sync(device); 4782b411b363SPhilipp Reisner 478320ceb2b2SLars Ellenberg /* serialize with bitmap writeout triggered by the state change, 478420ceb2b2SLars Ellenberg * if any. */ 4785b30ab791SAndreas Gruenbacher wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 478620ceb2b2SLars Ellenberg 4787b411b363SPhilipp Reisner /* tcp_close and release of sendpage pages can be deferred. I don't 4788b411b363SPhilipp Reisner * want to use SO_LINGER, because apparently it can be deferred for 4789b411b363SPhilipp Reisner * more than 20 seconds (longest time I checked). 4790b411b363SPhilipp Reisner * 4791b411b363SPhilipp Reisner * Actually we don't care for exactly when the network stack does its 4792b411b363SPhilipp Reisner * put_page(), but release our reference on these pages right here. 4793b411b363SPhilipp Reisner */ 4794b30ab791SAndreas Gruenbacher i = drbd_free_peer_reqs(device, &device->net_ee); 4795b411b363SPhilipp Reisner if (i) 4796d0180171SAndreas Gruenbacher drbd_info(device, "net_ee not empty, killed %u entries\n", i); 4797b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use_by_net); 4798435f0740SLars Ellenberg if (i) 4799d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); 4800b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use); 4801b411b363SPhilipp Reisner if (i) 4802d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use = %d, expected 0\n", i); 4803b411b363SPhilipp Reisner 48040b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->read_ee)); 48050b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->active_ee)); 48060b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->sync_ee)); 48070b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->done_ee)); 4808b411b363SPhilipp Reisner 4809360cc740SPhilipp Reisner return 0; 4810b411b363SPhilipp Reisner } 4811b411b363SPhilipp Reisner 4812b411b363SPhilipp Reisner /* 4813b411b363SPhilipp Reisner * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version 4814b411b363SPhilipp Reisner * we can agree on is stored in agreed_pro_version. 4815b411b363SPhilipp Reisner * 4816b411b363SPhilipp Reisner * feature flags and the reserved array should be enough room for future 4817b411b363SPhilipp Reisner * enhancements of the handshake protocol, and possible plugins... 4818b411b363SPhilipp Reisner * 4819b411b363SPhilipp Reisner * for now, they are expected to be zero, but ignored. 4820b411b363SPhilipp Reisner */ 4821bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection) 4822b411b363SPhilipp Reisner { 48239f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 48249f5bdc33SAndreas Gruenbacher struct p_connection_features *p; 4825b411b363SPhilipp Reisner 4826bde89a9eSAndreas Gruenbacher sock = &connection->data; 4827bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 48289f5bdc33SAndreas Gruenbacher if (!p) 4829e8d17b01SAndreas Gruenbacher return -EIO; 4830b411b363SPhilipp Reisner memset(p, 0, sizeof(*p)); 4831b411b363SPhilipp Reisner p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 4832b411b363SPhilipp Reisner p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 483320c68fdeSLars Ellenberg p->feature_flags = cpu_to_be32(PRO_FEATURES); 4834bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); 4835b411b363SPhilipp Reisner } 4836b411b363SPhilipp Reisner 4837b411b363SPhilipp Reisner /* 4838b411b363SPhilipp Reisner * return values: 4839b411b363SPhilipp Reisner * 1 yes, we have a valid connection 4840b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 4841b411b363SPhilipp Reisner * -1 peer talks different language, 4842b411b363SPhilipp Reisner * no point in trying again, please go standalone. 4843b411b363SPhilipp Reisner */ 4844bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection) 4845b411b363SPhilipp Reisner { 4846bde89a9eSAndreas Gruenbacher /* ASSERT current == connection->receiver ... */ 4847e658983aSAndreas Gruenbacher struct p_connection_features *p; 4848e658983aSAndreas Gruenbacher const int expect = sizeof(struct p_connection_features); 484977351055SPhilipp Reisner struct packet_info pi; 4850a5c31904SAndreas Gruenbacher int err; 4851b411b363SPhilipp Reisner 4852bde89a9eSAndreas Gruenbacher err = drbd_send_features(connection); 4853e8d17b01SAndreas Gruenbacher if (err) 4854b411b363SPhilipp Reisner return 0; 4855b411b363SPhilipp Reisner 4856bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 485769bc7bc3SAndreas Gruenbacher if (err) 4858b411b363SPhilipp Reisner return 0; 4859b411b363SPhilipp Reisner 48606038178eSAndreas Gruenbacher if (pi.cmd != P_CONNECTION_FEATURES) { 48611ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", 486277351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 4863b411b363SPhilipp Reisner return -1; 4864b411b363SPhilipp Reisner } 4865b411b363SPhilipp Reisner 486677351055SPhilipp Reisner if (pi.size != expect) { 48671ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", 486877351055SPhilipp Reisner expect, pi.size); 4869b411b363SPhilipp Reisner return -1; 4870b411b363SPhilipp Reisner } 4871b411b363SPhilipp Reisner 4872e658983aSAndreas Gruenbacher p = pi.data; 4873bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, p, expect); 4874a5c31904SAndreas Gruenbacher if (err) 4875b411b363SPhilipp Reisner return 0; 4876b411b363SPhilipp Reisner 4877b411b363SPhilipp Reisner p->protocol_min = be32_to_cpu(p->protocol_min); 4878b411b363SPhilipp Reisner p->protocol_max = be32_to_cpu(p->protocol_max); 4879b411b363SPhilipp Reisner if (p->protocol_max == 0) 4880b411b363SPhilipp Reisner p->protocol_max = p->protocol_min; 4881b411b363SPhilipp Reisner 4882b411b363SPhilipp Reisner if (PRO_VERSION_MAX < p->protocol_min || 4883b411b363SPhilipp Reisner PRO_VERSION_MIN > p->protocol_max) 4884b411b363SPhilipp Reisner goto incompat; 4885b411b363SPhilipp Reisner 4886bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 488720c68fdeSLars Ellenberg connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); 4888b411b363SPhilipp Reisner 48891ec861ebSAndreas Gruenbacher drbd_info(connection, "Handshake successful: " 4890bde89a9eSAndreas Gruenbacher "Agreed network protocol version %d\n", connection->agreed_pro_version); 4891b411b363SPhilipp Reisner 489220c68fdeSLars Ellenberg drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n", 489320c68fdeSLars Ellenberg connection->agreed_features & FF_TRIM ? " " : " not "); 489420c68fdeSLars Ellenberg 4895b411b363SPhilipp Reisner return 1; 4896b411b363SPhilipp Reisner 4897b411b363SPhilipp Reisner incompat: 48981ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible DRBD dialects: " 4899b411b363SPhilipp Reisner "I support %d-%d, peer supports %d-%d\n", 4900b411b363SPhilipp Reisner PRO_VERSION_MIN, PRO_VERSION_MAX, 4901b411b363SPhilipp Reisner p->protocol_min, p->protocol_max); 4902b411b363SPhilipp Reisner return -1; 4903b411b363SPhilipp Reisner } 4904b411b363SPhilipp Reisner 4905b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 4906bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 4907b411b363SPhilipp Reisner { 49081ec861ebSAndreas Gruenbacher drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 49091ec861ebSAndreas Gruenbacher drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 4910b10d96cbSJohannes Thoma return -1; 4911b411b363SPhilipp Reisner } 4912b411b363SPhilipp Reisner #else 4913b411b363SPhilipp Reisner #define CHALLENGE_LEN 64 4914b10d96cbSJohannes Thoma 4915b10d96cbSJohannes Thoma /* Return value: 4916b10d96cbSJohannes Thoma 1 - auth succeeded, 4917b10d96cbSJohannes Thoma 0 - failed, try again (network error), 4918b10d96cbSJohannes Thoma -1 - auth failed, don't try again. 4919b10d96cbSJohannes Thoma */ 4920b10d96cbSJohannes Thoma 4921bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 4922b411b363SPhilipp Reisner { 49239f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 4924b411b363SPhilipp Reisner char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 4925b411b363SPhilipp Reisner struct scatterlist sg; 4926b411b363SPhilipp Reisner char *response = NULL; 4927b411b363SPhilipp Reisner char *right_response = NULL; 4928b411b363SPhilipp Reisner char *peers_ch = NULL; 492944ed167dSPhilipp Reisner unsigned int key_len; 493044ed167dSPhilipp Reisner char secret[SHARED_SECRET_MAX]; /* 64 byte */ 4931b411b363SPhilipp Reisner unsigned int resp_size; 4932b411b363SPhilipp Reisner struct hash_desc desc; 493377351055SPhilipp Reisner struct packet_info pi; 493444ed167dSPhilipp Reisner struct net_conf *nc; 493569bc7bc3SAndreas Gruenbacher int err, rv; 4936b411b363SPhilipp Reisner 49379f5bdc33SAndreas Gruenbacher /* FIXME: Put the challenge/response into the preallocated socket buffer. */ 49389f5bdc33SAndreas Gruenbacher 493944ed167dSPhilipp Reisner rcu_read_lock(); 4940bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 494144ed167dSPhilipp Reisner key_len = strlen(nc->shared_secret); 494244ed167dSPhilipp Reisner memcpy(secret, nc->shared_secret, key_len); 494344ed167dSPhilipp Reisner rcu_read_unlock(); 494444ed167dSPhilipp Reisner 4945bde89a9eSAndreas Gruenbacher desc.tfm = connection->cram_hmac_tfm; 4946b411b363SPhilipp Reisner desc.flags = 0; 4947b411b363SPhilipp Reisner 4948bde89a9eSAndreas Gruenbacher rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 4949b411b363SPhilipp Reisner if (rv) { 49501ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv); 4951b10d96cbSJohannes Thoma rv = -1; 4952b411b363SPhilipp Reisner goto fail; 4953b411b363SPhilipp Reisner } 4954b411b363SPhilipp Reisner 4955b411b363SPhilipp Reisner get_random_bytes(my_challenge, CHALLENGE_LEN); 4956b411b363SPhilipp Reisner 4957bde89a9eSAndreas Gruenbacher sock = &connection->data; 4958bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 49599f5bdc33SAndreas Gruenbacher rv = 0; 49609f5bdc33SAndreas Gruenbacher goto fail; 49619f5bdc33SAndreas Gruenbacher } 4962bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, 49639f5bdc33SAndreas Gruenbacher my_challenge, CHALLENGE_LEN); 4964b411b363SPhilipp Reisner if (!rv) 4965b411b363SPhilipp Reisner goto fail; 4966b411b363SPhilipp Reisner 4967bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 496869bc7bc3SAndreas Gruenbacher if (err) { 4969b411b363SPhilipp Reisner rv = 0; 4970b411b363SPhilipp Reisner goto fail; 4971b411b363SPhilipp Reisner } 4972b411b363SPhilipp Reisner 497377351055SPhilipp Reisner if (pi.cmd != P_AUTH_CHALLENGE) { 49741ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 497577351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 4976b411b363SPhilipp Reisner rv = 0; 4977b411b363SPhilipp Reisner goto fail; 4978b411b363SPhilipp Reisner } 4979b411b363SPhilipp Reisner 498077351055SPhilipp Reisner if (pi.size > CHALLENGE_LEN * 2) { 49811ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge payload too big.\n"); 4982b10d96cbSJohannes Thoma rv = -1; 4983b411b363SPhilipp Reisner goto fail; 4984b411b363SPhilipp Reisner } 4985b411b363SPhilipp Reisner 498667cca286SPhilipp Reisner if (pi.size < CHALLENGE_LEN) { 498767cca286SPhilipp Reisner drbd_err(connection, "AuthChallenge payload too small.\n"); 498867cca286SPhilipp Reisner rv = -1; 498967cca286SPhilipp Reisner goto fail; 499067cca286SPhilipp Reisner } 499167cca286SPhilipp Reisner 499277351055SPhilipp Reisner peers_ch = kmalloc(pi.size, GFP_NOIO); 4993b411b363SPhilipp Reisner if (peers_ch == NULL) { 49941ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of peers_ch failed\n"); 4995b10d96cbSJohannes Thoma rv = -1; 4996b411b363SPhilipp Reisner goto fail; 4997b411b363SPhilipp Reisner } 4998b411b363SPhilipp Reisner 4999bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, peers_ch, pi.size); 5000a5c31904SAndreas Gruenbacher if (err) { 5001b411b363SPhilipp Reisner rv = 0; 5002b411b363SPhilipp Reisner goto fail; 5003b411b363SPhilipp Reisner } 5004b411b363SPhilipp Reisner 500567cca286SPhilipp Reisner if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { 500667cca286SPhilipp Reisner drbd_err(connection, "Peer presented the same challenge!\n"); 500767cca286SPhilipp Reisner rv = -1; 500867cca286SPhilipp Reisner goto fail; 500967cca286SPhilipp Reisner } 501067cca286SPhilipp Reisner 5011bde89a9eSAndreas Gruenbacher resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); 5012b411b363SPhilipp Reisner response = kmalloc(resp_size, GFP_NOIO); 5013b411b363SPhilipp Reisner if (response == NULL) { 50141ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of response failed\n"); 5015b10d96cbSJohannes Thoma rv = -1; 5016b411b363SPhilipp Reisner goto fail; 5017b411b363SPhilipp Reisner } 5018b411b363SPhilipp Reisner 5019b411b363SPhilipp Reisner sg_init_table(&sg, 1); 502077351055SPhilipp Reisner sg_set_buf(&sg, peers_ch, pi.size); 5021b411b363SPhilipp Reisner 5022b411b363SPhilipp Reisner rv = crypto_hash_digest(&desc, &sg, sg.length, response); 5023b411b363SPhilipp Reisner if (rv) { 50241ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 5025b10d96cbSJohannes Thoma rv = -1; 5026b411b363SPhilipp Reisner goto fail; 5027b411b363SPhilipp Reisner } 5028b411b363SPhilipp Reisner 5029bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 50309f5bdc33SAndreas Gruenbacher rv = 0; 50319f5bdc33SAndreas Gruenbacher goto fail; 50329f5bdc33SAndreas Gruenbacher } 5033bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, 50349f5bdc33SAndreas Gruenbacher response, resp_size); 5035b411b363SPhilipp Reisner if (!rv) 5036b411b363SPhilipp Reisner goto fail; 5037b411b363SPhilipp Reisner 5038bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 503969bc7bc3SAndreas Gruenbacher if (err) { 5040b411b363SPhilipp Reisner rv = 0; 5041b411b363SPhilipp Reisner goto fail; 5042b411b363SPhilipp Reisner } 5043b411b363SPhilipp Reisner 504477351055SPhilipp Reisner if (pi.cmd != P_AUTH_RESPONSE) { 50451ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", 504677351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 5047b411b363SPhilipp Reisner rv = 0; 5048b411b363SPhilipp Reisner goto fail; 5049b411b363SPhilipp Reisner } 5050b411b363SPhilipp Reisner 505177351055SPhilipp Reisner if (pi.size != resp_size) { 50521ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse payload of wrong size\n"); 5053b411b363SPhilipp Reisner rv = 0; 5054b411b363SPhilipp Reisner goto fail; 5055b411b363SPhilipp Reisner } 5056b411b363SPhilipp Reisner 5057bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, response , resp_size); 5058a5c31904SAndreas Gruenbacher if (err) { 5059b411b363SPhilipp Reisner rv = 0; 5060b411b363SPhilipp Reisner goto fail; 5061b411b363SPhilipp Reisner } 5062b411b363SPhilipp Reisner 5063b411b363SPhilipp Reisner right_response = kmalloc(resp_size, GFP_NOIO); 50642d1ee87dSJulia Lawall if (right_response == NULL) { 50651ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of right_response failed\n"); 5066b10d96cbSJohannes Thoma rv = -1; 5067b411b363SPhilipp Reisner goto fail; 5068b411b363SPhilipp Reisner } 5069b411b363SPhilipp Reisner 5070b411b363SPhilipp Reisner sg_set_buf(&sg, my_challenge, CHALLENGE_LEN); 5071b411b363SPhilipp Reisner 5072b411b363SPhilipp Reisner rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); 5073b411b363SPhilipp Reisner if (rv) { 50741ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 5075b10d96cbSJohannes Thoma rv = -1; 5076b411b363SPhilipp Reisner goto fail; 5077b411b363SPhilipp Reisner } 5078b411b363SPhilipp Reisner 5079b411b363SPhilipp Reisner rv = !memcmp(response, right_response, resp_size); 5080b411b363SPhilipp Reisner 5081b411b363SPhilipp Reisner if (rv) 50821ec861ebSAndreas Gruenbacher drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", 508344ed167dSPhilipp Reisner resp_size); 5084b10d96cbSJohannes Thoma else 5085b10d96cbSJohannes Thoma rv = -1; 5086b411b363SPhilipp Reisner 5087b411b363SPhilipp Reisner fail: 5088b411b363SPhilipp Reisner kfree(peers_ch); 5089b411b363SPhilipp Reisner kfree(response); 5090b411b363SPhilipp Reisner kfree(right_response); 5091b411b363SPhilipp Reisner 5092b411b363SPhilipp Reisner return rv; 5093b411b363SPhilipp Reisner } 5094b411b363SPhilipp Reisner #endif 5095b411b363SPhilipp Reisner 50968fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi) 5097b411b363SPhilipp Reisner { 5098bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 5099b411b363SPhilipp Reisner int h; 5100b411b363SPhilipp Reisner 51011ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver (re)started\n"); 5102b411b363SPhilipp Reisner 5103b411b363SPhilipp Reisner do { 5104bde89a9eSAndreas Gruenbacher h = conn_connect(connection); 5105b411b363SPhilipp Reisner if (h == 0) { 5106bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 510720ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ); 5108b411b363SPhilipp Reisner } 5109b411b363SPhilipp Reisner if (h == -1) { 51101ec861ebSAndreas Gruenbacher drbd_warn(connection, "Discarding network configuration.\n"); 5111bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 5112b411b363SPhilipp Reisner } 5113b411b363SPhilipp Reisner } while (h == 0); 5114b411b363SPhilipp Reisner 511591fd4dadSPhilipp Reisner if (h > 0) 5116bde89a9eSAndreas Gruenbacher drbdd(connection); 5117b411b363SPhilipp Reisner 5118bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 5119b411b363SPhilipp Reisner 51201ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver terminated\n"); 5121b411b363SPhilipp Reisner return 0; 5122b411b363SPhilipp Reisner } 5123b411b363SPhilipp Reisner 5124b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */ 5125b411b363SPhilipp Reisner 5126bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5127b411b363SPhilipp Reisner { 5128e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5129b411b363SPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5130b411b363SPhilipp Reisner 5131b411b363SPhilipp Reisner if (retcode >= SS_SUCCESS) { 5132bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); 5133b411b363SPhilipp Reisner } else { 5134bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); 51351ec861ebSAndreas Gruenbacher drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", 5136fc3b10a4SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5137fc3b10a4SPhilipp Reisner } 5138bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5139e4f78edeSPhilipp Reisner 51402735a594SAndreas Gruenbacher return 0; 5141fc3b10a4SPhilipp Reisner } 5142e4f78edeSPhilipp Reisner 5143bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5144e4f78edeSPhilipp Reisner { 51459f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5146b30ab791SAndreas Gruenbacher struct drbd_device *device; 5147e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5148e4f78edeSPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5149e4f78edeSPhilipp Reisner 51509f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 51519f4fe9adSAndreas Gruenbacher if (!peer_device) 51522735a594SAndreas Gruenbacher return -EIO; 51539f4fe9adSAndreas Gruenbacher device = peer_device->device; 51541952e916SAndreas Gruenbacher 5155bde89a9eSAndreas Gruenbacher if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { 51560b0ba1efSAndreas Gruenbacher D_ASSERT(device, connection->agreed_pro_version < 100); 5157bde89a9eSAndreas Gruenbacher return got_conn_RqSReply(connection, pi); 51584d0fc3fdSPhilipp Reisner } 51594d0fc3fdSPhilipp Reisner 5160e4f78edeSPhilipp Reisner if (retcode >= SS_SUCCESS) { 5161b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_SUCCESS, &device->flags); 5162e4f78edeSPhilipp Reisner } else { 5163b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_FAIL, &device->flags); 5164d0180171SAndreas Gruenbacher drbd_err(device, "Requested state change failed by peer: %s (%d)\n", 5165b411b363SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5166b411b363SPhilipp Reisner } 5167b30ab791SAndreas Gruenbacher wake_up(&device->state_wait); 5168b411b363SPhilipp Reisner 51692735a594SAndreas Gruenbacher return 0; 5170b411b363SPhilipp Reisner } 5171b411b363SPhilipp Reisner 5172bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi) 5173b411b363SPhilipp Reisner { 5174bde89a9eSAndreas Gruenbacher return drbd_send_ping_ack(connection); 5175b411b363SPhilipp Reisner 5176b411b363SPhilipp Reisner } 5177b411b363SPhilipp Reisner 5178bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) 5179b411b363SPhilipp Reisner { 5180b411b363SPhilipp Reisner /* restore idle timeout */ 5181bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; 5182bde89a9eSAndreas Gruenbacher if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) 5183bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5184b411b363SPhilipp Reisner 51852735a594SAndreas Gruenbacher return 0; 5186b411b363SPhilipp Reisner } 5187b411b363SPhilipp Reisner 5188bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) 5189b411b363SPhilipp Reisner { 51909f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5191b30ab791SAndreas Gruenbacher struct drbd_device *device; 5192e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5193b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5194b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5195b411b363SPhilipp Reisner 51969f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 51979f4fe9adSAndreas Gruenbacher if (!peer_device) 51982735a594SAndreas Gruenbacher return -EIO; 51999f4fe9adSAndreas Gruenbacher device = peer_device->device; 52001952e916SAndreas Gruenbacher 52019f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 5202b411b363SPhilipp Reisner 520369a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5204b411b363SPhilipp Reisner 5205b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 5206b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5207b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5208b411b363SPhilipp Reisner /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 5209b30ab791SAndreas Gruenbacher device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 5210b30ab791SAndreas Gruenbacher put_ldev(device); 52111d53f09eSLars Ellenberg } 5212b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5213b30ab791SAndreas Gruenbacher atomic_add(blksize >> 9, &device->rs_sect_in); 5214b411b363SPhilipp Reisner 52152735a594SAndreas Gruenbacher return 0; 5216b411b363SPhilipp Reisner } 5217b411b363SPhilipp Reisner 5218bc9c5c41SAndreas Gruenbacher static int 5219b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, 5220bc9c5c41SAndreas Gruenbacher struct rb_root *root, const char *func, 5221bc9c5c41SAndreas Gruenbacher enum drbd_req_event what, bool missing_ok) 5222b411b363SPhilipp Reisner { 5223b411b363SPhilipp Reisner struct drbd_request *req; 5224b411b363SPhilipp Reisner struct bio_and_error m; 5225b411b363SPhilipp Reisner 52260500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 5227b30ab791SAndreas Gruenbacher req = find_request(device, root, id, sector, missing_ok, func); 5228b411b363SPhilipp Reisner if (unlikely(!req)) { 52290500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 523085997675SAndreas Gruenbacher return -EIO; 5231b411b363SPhilipp Reisner } 5232b411b363SPhilipp Reisner __req_mod(req, what, &m); 52330500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 5234b411b363SPhilipp Reisner 5235b411b363SPhilipp Reisner if (m.bio) 5236b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 523785997675SAndreas Gruenbacher return 0; 5238b411b363SPhilipp Reisner } 5239b411b363SPhilipp Reisner 5240bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) 5241b411b363SPhilipp Reisner { 52429f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5243b30ab791SAndreas Gruenbacher struct drbd_device *device; 5244e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5245b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5246b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5247b411b363SPhilipp Reisner enum drbd_req_event what; 5248b411b363SPhilipp Reisner 52499f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 52509f4fe9adSAndreas Gruenbacher if (!peer_device) 52512735a594SAndreas Gruenbacher return -EIO; 52529f4fe9adSAndreas Gruenbacher device = peer_device->device; 52531952e916SAndreas Gruenbacher 525469a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5255b411b363SPhilipp Reisner 5256579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5257b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5258b30ab791SAndreas Gruenbacher dec_rs_pending(device); 52592735a594SAndreas Gruenbacher return 0; 5260b411b363SPhilipp Reisner } 5261e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5262b411b363SPhilipp Reisner case P_RS_WRITE_ACK: 52638554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER_AND_SIS; 5264b411b363SPhilipp Reisner break; 5265b411b363SPhilipp Reisner case P_WRITE_ACK: 52668554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER; 5267b411b363SPhilipp Reisner break; 5268b411b363SPhilipp Reisner case P_RECV_ACK: 52698554df1cSAndreas Gruenbacher what = RECV_ACKED_BY_PEER; 5270b411b363SPhilipp Reisner break; 5271d4dabbe2SLars Ellenberg case P_SUPERSEDED: 5272d4dabbe2SLars Ellenberg what = CONFLICT_RESOLVED; 52737be8da07SAndreas Gruenbacher break; 52747be8da07SAndreas Gruenbacher case P_RETRY_WRITE: 52757be8da07SAndreas Gruenbacher what = POSTPONE_WRITE; 5276b411b363SPhilipp Reisner break; 5277b411b363SPhilipp Reisner default: 52782735a594SAndreas Gruenbacher BUG(); 5279b411b363SPhilipp Reisner } 5280b411b363SPhilipp Reisner 5281b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5282b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5283bc9c5c41SAndreas Gruenbacher what, false); 5284b411b363SPhilipp Reisner } 5285b411b363SPhilipp Reisner 5286bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) 5287b411b363SPhilipp Reisner { 52889f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5289b30ab791SAndreas Gruenbacher struct drbd_device *device; 5290e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5291b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 52922deb8336SPhilipp Reisner int size = be32_to_cpu(p->blksize); 529385997675SAndreas Gruenbacher int err; 5294b411b363SPhilipp Reisner 52959f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 52969f4fe9adSAndreas Gruenbacher if (!peer_device) 52972735a594SAndreas Gruenbacher return -EIO; 52989f4fe9adSAndreas Gruenbacher device = peer_device->device; 5299b411b363SPhilipp Reisner 530069a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5301b411b363SPhilipp Reisner 5302579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5303b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5304b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 53052735a594SAndreas Gruenbacher return 0; 5306b411b363SPhilipp Reisner } 53072deb8336SPhilipp Reisner 5308b30ab791SAndreas Gruenbacher err = validate_req_change_req_state(device, p->block_id, sector, 5309b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5310303d1448SPhilipp Reisner NEG_ACKED, true); 531185997675SAndreas Gruenbacher if (err) { 53122deb8336SPhilipp Reisner /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 53132deb8336SPhilipp Reisner The master bio might already be completed, therefore the 5314c3afd8f5SAndreas Gruenbacher request is no longer in the collision hash. */ 53152deb8336SPhilipp Reisner /* In Protocol B we might already have got a P_RECV_ACK 53162deb8336SPhilipp Reisner but then get a P_NEG_ACK afterwards. */ 5317b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 53182deb8336SPhilipp Reisner } 53192735a594SAndreas Gruenbacher return 0; 5320b411b363SPhilipp Reisner } 5321b411b363SPhilipp Reisner 5322bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) 5323b411b363SPhilipp Reisner { 53249f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5325b30ab791SAndreas Gruenbacher struct drbd_device *device; 5326e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5327b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5328b411b363SPhilipp Reisner 53299f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 53309f4fe9adSAndreas Gruenbacher if (!peer_device) 53312735a594SAndreas Gruenbacher return -EIO; 53329f4fe9adSAndreas Gruenbacher device = peer_device->device; 53331952e916SAndreas Gruenbacher 533469a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 53357be8da07SAndreas Gruenbacher 5336d0180171SAndreas Gruenbacher drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", 5337b411b363SPhilipp Reisner (unsigned long long)sector, be32_to_cpu(p->blksize)); 5338b411b363SPhilipp Reisner 5339b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5340b30ab791SAndreas Gruenbacher &device->read_requests, __func__, 53418554df1cSAndreas Gruenbacher NEG_ACKED, false); 5342b411b363SPhilipp Reisner } 5343b411b363SPhilipp Reisner 5344bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) 5345b411b363SPhilipp Reisner { 53469f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5347b30ab791SAndreas Gruenbacher struct drbd_device *device; 5348b411b363SPhilipp Reisner sector_t sector; 5349b411b363SPhilipp Reisner int size; 5350e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 53511952e916SAndreas Gruenbacher 53529f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 53539f4fe9adSAndreas Gruenbacher if (!peer_device) 53542735a594SAndreas Gruenbacher return -EIO; 53559f4fe9adSAndreas Gruenbacher device = peer_device->device; 5356b411b363SPhilipp Reisner 5357b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5358b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5359b411b363SPhilipp Reisner 536069a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5361b411b363SPhilipp Reisner 5362b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5363b411b363SPhilipp Reisner 5364b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 5365b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5366e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5367d612d309SPhilipp Reisner case P_NEG_RS_DREPLY: 5368b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 5369d612d309SPhilipp Reisner case P_RS_CANCEL: 5370d612d309SPhilipp Reisner break; 5371d612d309SPhilipp Reisner default: 53722735a594SAndreas Gruenbacher BUG(); 5373d612d309SPhilipp Reisner } 5374b30ab791SAndreas Gruenbacher put_ldev(device); 5375b411b363SPhilipp Reisner } 5376b411b363SPhilipp Reisner 53772735a594SAndreas Gruenbacher return 0; 5378b411b363SPhilipp Reisner } 5379b411b363SPhilipp Reisner 5380bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) 5381b411b363SPhilipp Reisner { 5382e658983aSAndreas Gruenbacher struct p_barrier_ack *p = pi->data; 5383c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 53849ed57dcbSLars Ellenberg int vnr; 5385b411b363SPhilipp Reisner 5386bde89a9eSAndreas Gruenbacher tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); 5387b411b363SPhilipp Reisner 53889ed57dcbSLars Ellenberg rcu_read_lock(); 5389c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5390c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5391c06ece6bSAndreas Gruenbacher 5392b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD && 5393b30ab791SAndreas Gruenbacher atomic_read(&device->ap_in_flight) == 0 && 5394b30ab791SAndreas Gruenbacher !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { 5395b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ; 5396b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 5397c4752ef1SPhilipp Reisner } 53989ed57dcbSLars Ellenberg } 53999ed57dcbSLars Ellenberg rcu_read_unlock(); 5400c4752ef1SPhilipp Reisner 54012735a594SAndreas Gruenbacher return 0; 5402b411b363SPhilipp Reisner } 5403b411b363SPhilipp Reisner 5404bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) 5405b411b363SPhilipp Reisner { 54069f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5407b30ab791SAndreas Gruenbacher struct drbd_device *device; 5408e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 540984b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 5410b411b363SPhilipp Reisner sector_t sector; 5411b411b363SPhilipp Reisner int size; 5412b411b363SPhilipp Reisner 54139f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 54149f4fe9adSAndreas Gruenbacher if (!peer_device) 54152735a594SAndreas Gruenbacher return -EIO; 54169f4fe9adSAndreas Gruenbacher device = peer_device->device; 54171952e916SAndreas Gruenbacher 5418b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5419b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5420b411b363SPhilipp Reisner 542169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5422b411b363SPhilipp Reisner 5423b411b363SPhilipp Reisner if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 5424b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 5425b411b363SPhilipp Reisner else 5426b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5427b411b363SPhilipp Reisner 5428b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 54292735a594SAndreas Gruenbacher return 0; 54301d53f09eSLars Ellenberg 5431b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5432b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5433b411b363SPhilipp Reisner 5434b30ab791SAndreas Gruenbacher --device->ov_left; 5435ea5442afSLars Ellenberg 5436ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 5437b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 5438b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 5439ea5442afSLars Ellenberg 5440b30ab791SAndreas Gruenbacher if (device->ov_left == 0) { 544184b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(*dw), GFP_NOIO); 544284b8c06bSAndreas Gruenbacher if (dw) { 544384b8c06bSAndreas Gruenbacher dw->w.cb = w_ov_finished; 544484b8c06bSAndreas Gruenbacher dw->device = device; 544584b8c06bSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &dw->w); 5446b411b363SPhilipp Reisner } else { 544784b8c06bSAndreas Gruenbacher drbd_err(device, "kmalloc(dw) failed."); 5448b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5449b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 5450b411b363SPhilipp Reisner } 5451b411b363SPhilipp Reisner } 5452b30ab791SAndreas Gruenbacher put_ldev(device); 54532735a594SAndreas Gruenbacher return 0; 5454b411b363SPhilipp Reisner } 5455b411b363SPhilipp Reisner 5456bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi) 54570ced55a3SPhilipp Reisner { 54582735a594SAndreas Gruenbacher return 0; 54590ced55a3SPhilipp Reisner } 54600ced55a3SPhilipp Reisner 5461668700b4SPhilipp Reisner struct meta_sock_cmd { 5462b411b363SPhilipp Reisner size_t pkt_size; 5463bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *connection, struct packet_info *); 5464b411b363SPhilipp Reisner }; 5465b411b363SPhilipp Reisner 5466668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout) 5467668700b4SPhilipp Reisner { 5468668700b4SPhilipp Reisner long t; 5469668700b4SPhilipp Reisner struct net_conf *nc; 5470668700b4SPhilipp Reisner 5471668700b4SPhilipp Reisner rcu_read_lock(); 5472668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 5473668700b4SPhilipp Reisner t = ping_timeout ? nc->ping_timeo : nc->ping_int; 5474668700b4SPhilipp Reisner rcu_read_unlock(); 5475668700b4SPhilipp Reisner 5476668700b4SPhilipp Reisner t *= HZ; 5477668700b4SPhilipp Reisner if (ping_timeout) 5478668700b4SPhilipp Reisner t /= 10; 5479668700b4SPhilipp Reisner 5480668700b4SPhilipp Reisner connection->meta.socket->sk->sk_rcvtimeo = t; 5481668700b4SPhilipp Reisner } 5482668700b4SPhilipp Reisner 5483668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection) 5484668700b4SPhilipp Reisner { 5485668700b4SPhilipp Reisner set_rcvtimeo(connection, 1); 5486668700b4SPhilipp Reisner } 5487668700b4SPhilipp Reisner 5488668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection) 5489668700b4SPhilipp Reisner { 5490668700b4SPhilipp Reisner set_rcvtimeo(connection, 0); 5491668700b4SPhilipp Reisner } 5492668700b4SPhilipp Reisner 5493668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = { 5494e658983aSAndreas Gruenbacher [P_PING] = { 0, got_Ping }, 5495e658983aSAndreas Gruenbacher [P_PING_ACK] = { 0, got_PingAck }, 5496b411b363SPhilipp Reisner [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5497b411b363SPhilipp Reisner [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5498b411b363SPhilipp Reisner [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5499d4dabbe2SLars Ellenberg [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, 5500b411b363SPhilipp Reisner [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 5501b411b363SPhilipp Reisner [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 5502b411b363SPhilipp Reisner [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, 5503b411b363SPhilipp Reisner [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 5504b411b363SPhilipp Reisner [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 5505b411b363SPhilipp Reisner [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 5506b411b363SPhilipp Reisner [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 550702918be2SPhilipp Reisner [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 5508d612d309SPhilipp Reisner [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, 55091952e916SAndreas Gruenbacher [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, 55101952e916SAndreas Gruenbacher [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, 5511b411b363SPhilipp Reisner }; 5512b411b363SPhilipp Reisner 55131c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi) 5514b411b363SPhilipp Reisner { 5515bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 5516668700b4SPhilipp Reisner struct meta_sock_cmd *cmd = NULL; 551777351055SPhilipp Reisner struct packet_info pi; 5518668700b4SPhilipp Reisner unsigned long pre_recv_jif; 5519257d0af6SPhilipp Reisner int rv; 5520bde89a9eSAndreas Gruenbacher void *buf = connection->meta.rbuf; 5521b411b363SPhilipp Reisner int received = 0; 5522bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 552352b061a4SAndreas Gruenbacher int expect = header_size; 552444ed167dSPhilipp Reisner bool ping_timeout_active = false; 55253990e04dSPhilipp Reisner struct sched_param param = { .sched_priority = 2 }; 5526b411b363SPhilipp Reisner 55273990e04dSPhilipp Reisner rv = sched_setscheduler(current, SCHED_RR, ¶m); 55283990e04dSPhilipp Reisner if (rv < 0) 5529668700b4SPhilipp Reisner drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv); 5530b411b363SPhilipp Reisner 5531e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 553280822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 553344ed167dSPhilipp Reisner 5534668700b4SPhilipp Reisner conn_reclaim_net_peer_reqs(connection); 553544ed167dSPhilipp Reisner 5536bde89a9eSAndreas Gruenbacher if (test_and_clear_bit(SEND_PING, &connection->flags)) { 5537bde89a9eSAndreas Gruenbacher if (drbd_send_ping(connection)) { 55381ec861ebSAndreas Gruenbacher drbd_err(connection, "drbd_send_ping has failed\n"); 5539841ce241SAndreas Gruenbacher goto reconnect; 5540841ce241SAndreas Gruenbacher } 5541668700b4SPhilipp Reisner set_ping_timeout(connection); 554244ed167dSPhilipp Reisner ping_timeout_active = true; 5543b411b363SPhilipp Reisner } 5544b411b363SPhilipp Reisner 5545668700b4SPhilipp Reisner pre_recv_jif = jiffies; 5546bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); 5547b411b363SPhilipp Reisner 5548b411b363SPhilipp Reisner /* Note: 5549b411b363SPhilipp Reisner * -EINTR (on meta) we got a signal 5550b411b363SPhilipp Reisner * -EAGAIN (on meta) rcvtimeo expired 5551b411b363SPhilipp Reisner * -ECONNRESET other side closed the connection 5552b411b363SPhilipp Reisner * -ERESTARTSYS (on data) we got a signal 5553b411b363SPhilipp Reisner * rv < 0 other than above: unexpected error! 5554b411b363SPhilipp Reisner * rv == expected: full header or command 5555b411b363SPhilipp Reisner * rv < expected: "woken" by signal during receive 5556b411b363SPhilipp Reisner * rv == 0 : "connection shut down by peer" 5557b411b363SPhilipp Reisner */ 5558b411b363SPhilipp Reisner if (likely(rv > 0)) { 5559b411b363SPhilipp Reisner received += rv; 5560b411b363SPhilipp Reisner buf += rv; 5561b411b363SPhilipp Reisner } else if (rv == 0) { 5562bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 5563b66623e3SPhilipp Reisner long t; 5564b66623e3SPhilipp Reisner rcu_read_lock(); 5565bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 5566b66623e3SPhilipp Reisner rcu_read_unlock(); 5567b66623e3SPhilipp Reisner 5568bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, 5569bde89a9eSAndreas Gruenbacher connection->cstate < C_WF_REPORT_PARAMS, 5570b66623e3SPhilipp Reisner t); 5571599377acSPhilipp Reisner if (t) 5572599377acSPhilipp Reisner break; 5573599377acSPhilipp Reisner } 55741ec861ebSAndreas Gruenbacher drbd_err(connection, "meta connection shut down by peer.\n"); 5575b411b363SPhilipp Reisner goto reconnect; 5576b411b363SPhilipp Reisner } else if (rv == -EAGAIN) { 5577cb6518cbSLars Ellenberg /* If the data socket received something meanwhile, 5578cb6518cbSLars Ellenberg * that is good enough: peer is still alive. */ 5579668700b4SPhilipp Reisner if (time_after(connection->last_received, pre_recv_jif)) 5580cb6518cbSLars Ellenberg continue; 5581f36af18cSLars Ellenberg if (ping_timeout_active) { 55821ec861ebSAndreas Gruenbacher drbd_err(connection, "PingAck did not arrive in time.\n"); 5583b411b363SPhilipp Reisner goto reconnect; 5584b411b363SPhilipp Reisner } 5585bde89a9eSAndreas Gruenbacher set_bit(SEND_PING, &connection->flags); 5586b411b363SPhilipp Reisner continue; 5587b411b363SPhilipp Reisner } else if (rv == -EINTR) { 5588668700b4SPhilipp Reisner /* maybe drbd_thread_stop(): the while condition will notice. 5589668700b4SPhilipp Reisner * maybe woken for send_ping: we'll send a ping above, 5590668700b4SPhilipp Reisner * and change the rcvtimeo */ 5591668700b4SPhilipp Reisner flush_signals(current); 5592b411b363SPhilipp Reisner continue; 5593b411b363SPhilipp Reisner } else { 55941ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 5595b411b363SPhilipp Reisner goto reconnect; 5596b411b363SPhilipp Reisner } 5597b411b363SPhilipp Reisner 5598b411b363SPhilipp Reisner if (received == expect && cmd == NULL) { 5599bde89a9eSAndreas Gruenbacher if (decode_header(connection, connection->meta.rbuf, &pi)) 5600b411b363SPhilipp Reisner goto reconnect; 5601668700b4SPhilipp Reisner cmd = &ack_receiver_tbl[pi.cmd]; 5602668700b4SPhilipp Reisner if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) { 56031ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", 56042fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 5605b411b363SPhilipp Reisner goto disconnect; 5606b411b363SPhilipp Reisner } 5607e658983aSAndreas Gruenbacher expect = header_size + cmd->pkt_size; 560852b061a4SAndreas Gruenbacher if (pi.size != expect - header_size) { 56091ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", 561077351055SPhilipp Reisner pi.cmd, pi.size); 5611b411b363SPhilipp Reisner goto reconnect; 5612b411b363SPhilipp Reisner } 5613257d0af6SPhilipp Reisner } 5614b411b363SPhilipp Reisner if (received == expect) { 56152735a594SAndreas Gruenbacher bool err; 5616a4fbda8eSPhilipp Reisner 5617bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 56182735a594SAndreas Gruenbacher if (err) { 56191ec861ebSAndreas Gruenbacher drbd_err(connection, "%pf failed\n", cmd->fn); 5620b411b363SPhilipp Reisner goto reconnect; 56211952e916SAndreas Gruenbacher } 5622b411b363SPhilipp Reisner 5623bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 5624f36af18cSLars Ellenberg 5625668700b4SPhilipp Reisner if (cmd == &ack_receiver_tbl[P_PING_ACK]) { 5626668700b4SPhilipp Reisner set_idle_timeout(connection); 562744ed167dSPhilipp Reisner ping_timeout_active = false; 562844ed167dSPhilipp Reisner } 5629b411b363SPhilipp Reisner 5630bde89a9eSAndreas Gruenbacher buf = connection->meta.rbuf; 5631b411b363SPhilipp Reisner received = 0; 563252b061a4SAndreas Gruenbacher expect = header_size; 5633b411b363SPhilipp Reisner cmd = NULL; 5634b411b363SPhilipp Reisner } 5635b411b363SPhilipp Reisner } 5636b411b363SPhilipp Reisner 5637b411b363SPhilipp Reisner if (0) { 5638b411b363SPhilipp Reisner reconnect: 5639bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 5640bde89a9eSAndreas Gruenbacher conn_md_sync(connection); 5641b411b363SPhilipp Reisner } 5642b411b363SPhilipp Reisner if (0) { 5643b411b363SPhilipp Reisner disconnect: 5644bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 5645b411b363SPhilipp Reisner } 5646b411b363SPhilipp Reisner 5647668700b4SPhilipp Reisner drbd_info(connection, "ack_receiver terminated\n"); 5648b411b363SPhilipp Reisner 5649b411b363SPhilipp Reisner return 0; 5650b411b363SPhilipp Reisner } 5651668700b4SPhilipp Reisner 5652668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws) 5653668700b4SPhilipp Reisner { 5654668700b4SPhilipp Reisner struct drbd_peer_device *peer_device = 5655668700b4SPhilipp Reisner container_of(ws, struct drbd_peer_device, send_acks_work); 5656668700b4SPhilipp Reisner struct drbd_connection *connection = peer_device->connection; 5657668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device; 5658668700b4SPhilipp Reisner struct net_conf *nc; 5659668700b4SPhilipp Reisner int tcp_cork, err; 5660668700b4SPhilipp Reisner 5661668700b4SPhilipp Reisner rcu_read_lock(); 5662668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 5663668700b4SPhilipp Reisner tcp_cork = nc->tcp_cork; 5664668700b4SPhilipp Reisner rcu_read_unlock(); 5665668700b4SPhilipp Reisner 5666668700b4SPhilipp Reisner if (tcp_cork) 5667668700b4SPhilipp Reisner drbd_tcp_cork(connection->meta.socket); 5668668700b4SPhilipp Reisner 5669668700b4SPhilipp Reisner err = drbd_finish_peer_reqs(device); 5670668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 5671668700b4SPhilipp Reisner /* get is in drbd_endio_write_sec_final(). That is necessary to keep the 5672668700b4SPhilipp Reisner struct work_struct send_acks_work alive, which is in the peer_device object */ 5673668700b4SPhilipp Reisner 5674668700b4SPhilipp Reisner if (err) { 5675668700b4SPhilipp Reisner conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 5676668700b4SPhilipp Reisner return; 5677668700b4SPhilipp Reisner } 5678668700b4SPhilipp Reisner 5679668700b4SPhilipp Reisner if (tcp_cork) 5680668700b4SPhilipp Reisner drbd_tcp_uncork(connection->meta.socket); 5681668700b4SPhilipp Reisner 5682668700b4SPhilipp Reisner return; 5683668700b4SPhilipp Reisner } 5684