1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2b411b363SPhilipp Reisner /* 3b411b363SPhilipp Reisner drbd_receiver.c 4b411b363SPhilipp Reisner 5b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6b411b363SPhilipp Reisner 7b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 8b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10b411b363SPhilipp Reisner 11b411b363SPhilipp Reisner */ 12b411b363SPhilipp Reisner 13b411b363SPhilipp Reisner 14b411b363SPhilipp Reisner #include <linux/module.h> 15b411b363SPhilipp Reisner 167e5fec31SFabian Frederick #include <linux/uaccess.h> 17b411b363SPhilipp Reisner #include <net/sock.h> 18b411b363SPhilipp Reisner 19b411b363SPhilipp Reisner #include <linux/drbd.h> 20b411b363SPhilipp Reisner #include <linux/fs.h> 21b411b363SPhilipp Reisner #include <linux/file.h> 22b411b363SPhilipp Reisner #include <linux/in.h> 23b411b363SPhilipp Reisner #include <linux/mm.h> 24b411b363SPhilipp Reisner #include <linux/memcontrol.h> 25b411b363SPhilipp Reisner #include <linux/mm_inline.h> 26b411b363SPhilipp Reisner #include <linux/slab.h> 27ae7e81c0SIngo Molnar #include <uapi/linux/sched/types.h> 28174cd4b1SIngo Molnar #include <linux/sched/signal.h> 29b411b363SPhilipp Reisner #include <linux/pkt_sched.h> 30b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__ 31b411b363SPhilipp Reisner #include <linux/unistd.h> 32b411b363SPhilipp Reisner #include <linux/vmalloc.h> 33b411b363SPhilipp Reisner #include <linux/random.h> 34b411b363SPhilipp Reisner #include <linux/string.h> 35b411b363SPhilipp Reisner #include <linux/scatterlist.h> 36b411b363SPhilipp Reisner #include "drbd_int.h" 37a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h" 38b411b363SPhilipp Reisner #include "drbd_req.h" 39b411b363SPhilipp Reisner #include "drbd_vli.h" 40b411b363SPhilipp Reisner 41f31e583aSLars Ellenberg #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES) 4220c68fdeSLars Ellenberg 4377351055SPhilipp Reisner struct packet_info { 4477351055SPhilipp Reisner enum drbd_packet cmd; 45e2857216SAndreas Gruenbacher unsigned int size; 46e2857216SAndreas Gruenbacher unsigned int vnr; 47e658983aSAndreas Gruenbacher void *data; 4877351055SPhilipp Reisner }; 4977351055SPhilipp Reisner 50b411b363SPhilipp Reisner enum finish_epoch { 51b411b363SPhilipp Reisner FE_STILL_LIVE, 52b411b363SPhilipp Reisner FE_DESTROYED, 53b411b363SPhilipp Reisner FE_RECYCLED, 54b411b363SPhilipp Reisner }; 55b411b363SPhilipp Reisner 56bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection); 57bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection); 5869a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *); 59a0fb3c47SLars Ellenberg static void conn_wait_active_ee_empty(struct drbd_connection *connection); 60bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); 6199920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *, int); 62b411b363SPhilipp Reisner 63b411b363SPhilipp Reisner 64b411b363SPhilipp Reisner #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) 65b411b363SPhilipp Reisner 6645bb912bSLars Ellenberg /* 6745bb912bSLars Ellenberg * some helper functions to deal with single linked page lists, 6845bb912bSLars Ellenberg * page->private being our "next" pointer. 6945bb912bSLars Ellenberg */ 7045bb912bSLars Ellenberg 7145bb912bSLars Ellenberg /* If at least n pages are linked at head, get n pages off. 7245bb912bSLars Ellenberg * Otherwise, don't modify head, and return NULL. 7345bb912bSLars Ellenberg * Locking is the responsibility of the caller. 7445bb912bSLars Ellenberg */ 7545bb912bSLars Ellenberg static struct page *page_chain_del(struct page **head, int n) 7645bb912bSLars Ellenberg { 7745bb912bSLars Ellenberg struct page *page; 7845bb912bSLars Ellenberg struct page *tmp; 7945bb912bSLars Ellenberg 8045bb912bSLars Ellenberg BUG_ON(!n); 8145bb912bSLars Ellenberg BUG_ON(!head); 8245bb912bSLars Ellenberg 8345bb912bSLars Ellenberg page = *head; 8423ce4227SPhilipp Reisner 8523ce4227SPhilipp Reisner if (!page) 8623ce4227SPhilipp Reisner return NULL; 8723ce4227SPhilipp Reisner 8845bb912bSLars Ellenberg while (page) { 8945bb912bSLars Ellenberg tmp = page_chain_next(page); 9045bb912bSLars Ellenberg if (--n == 0) 9145bb912bSLars Ellenberg break; /* found sufficient pages */ 9245bb912bSLars Ellenberg if (tmp == NULL) 9345bb912bSLars Ellenberg /* insufficient pages, don't use any of them. */ 9445bb912bSLars Ellenberg return NULL; 9545bb912bSLars Ellenberg page = tmp; 9645bb912bSLars Ellenberg } 9745bb912bSLars Ellenberg 9845bb912bSLars Ellenberg /* add end of list marker for the returned list */ 9945bb912bSLars Ellenberg set_page_private(page, 0); 10045bb912bSLars Ellenberg /* actual return value, and adjustment of head */ 10145bb912bSLars Ellenberg page = *head; 10245bb912bSLars Ellenberg *head = tmp; 10345bb912bSLars Ellenberg return page; 10445bb912bSLars Ellenberg } 10545bb912bSLars Ellenberg 10645bb912bSLars Ellenberg /* may be used outside of locks to find the tail of a (usually short) 10745bb912bSLars Ellenberg * "private" page chain, before adding it back to a global chain head 10845bb912bSLars Ellenberg * with page_chain_add() under a spinlock. */ 10945bb912bSLars Ellenberg static struct page *page_chain_tail(struct page *page, int *len) 11045bb912bSLars Ellenberg { 11145bb912bSLars Ellenberg struct page *tmp; 11245bb912bSLars Ellenberg int i = 1; 11345bb912bSLars Ellenberg while ((tmp = page_chain_next(page))) 11445bb912bSLars Ellenberg ++i, page = tmp; 11545bb912bSLars Ellenberg if (len) 11645bb912bSLars Ellenberg *len = i; 11745bb912bSLars Ellenberg return page; 11845bb912bSLars Ellenberg } 11945bb912bSLars Ellenberg 12045bb912bSLars Ellenberg static int page_chain_free(struct page *page) 12145bb912bSLars Ellenberg { 12245bb912bSLars Ellenberg struct page *tmp; 12345bb912bSLars Ellenberg int i = 0; 12445bb912bSLars Ellenberg page_chain_for_each_safe(page, tmp) { 12545bb912bSLars Ellenberg put_page(page); 12645bb912bSLars Ellenberg ++i; 12745bb912bSLars Ellenberg } 12845bb912bSLars Ellenberg return i; 12945bb912bSLars Ellenberg } 13045bb912bSLars Ellenberg 13145bb912bSLars Ellenberg static void page_chain_add(struct page **head, 13245bb912bSLars Ellenberg struct page *chain_first, struct page *chain_last) 13345bb912bSLars Ellenberg { 13445bb912bSLars Ellenberg #if 1 13545bb912bSLars Ellenberg struct page *tmp; 13645bb912bSLars Ellenberg tmp = page_chain_tail(chain_first, NULL); 13745bb912bSLars Ellenberg BUG_ON(tmp != chain_last); 13845bb912bSLars Ellenberg #endif 13945bb912bSLars Ellenberg 14045bb912bSLars Ellenberg /* add chain to head */ 14145bb912bSLars Ellenberg set_page_private(chain_last, (unsigned long)*head); 14245bb912bSLars Ellenberg *head = chain_first; 14345bb912bSLars Ellenberg } 14445bb912bSLars Ellenberg 145b30ab791SAndreas Gruenbacher static struct page *__drbd_alloc_pages(struct drbd_device *device, 14618c2d522SAndreas Gruenbacher unsigned int number) 147b411b363SPhilipp Reisner { 148b411b363SPhilipp Reisner struct page *page = NULL; 14945bb912bSLars Ellenberg struct page *tmp = NULL; 15018c2d522SAndreas Gruenbacher unsigned int i = 0; 151b411b363SPhilipp Reisner 152b411b363SPhilipp Reisner /* Yes, testing drbd_pp_vacant outside the lock is racy. 153b411b363SPhilipp Reisner * So what. It saves a spin_lock. */ 15445bb912bSLars Ellenberg if (drbd_pp_vacant >= number) { 155b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 15645bb912bSLars Ellenberg page = page_chain_del(&drbd_pp_pool, number); 15745bb912bSLars Ellenberg if (page) 15845bb912bSLars Ellenberg drbd_pp_vacant -= number; 159b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 16045bb912bSLars Ellenberg if (page) 16145bb912bSLars Ellenberg return page; 162b411b363SPhilipp Reisner } 16345bb912bSLars Ellenberg 164b411b363SPhilipp Reisner /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD 165b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 166b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 16745bb912bSLars Ellenberg for (i = 0; i < number; i++) { 16845bb912bSLars Ellenberg tmp = alloc_page(GFP_TRY); 16945bb912bSLars Ellenberg if (!tmp) 17045bb912bSLars Ellenberg break; 17145bb912bSLars Ellenberg set_page_private(tmp, (unsigned long)page); 17245bb912bSLars Ellenberg page = tmp; 17345bb912bSLars Ellenberg } 17445bb912bSLars Ellenberg 17545bb912bSLars Ellenberg if (i == number) 176b411b363SPhilipp Reisner return page; 17745bb912bSLars Ellenberg 17845bb912bSLars Ellenberg /* Not enough pages immediately available this time. 179c37c8ecfSAndreas Gruenbacher * No need to jump around here, drbd_alloc_pages will retry this 18045bb912bSLars Ellenberg * function "soon". */ 18145bb912bSLars Ellenberg if (page) { 18245bb912bSLars Ellenberg tmp = page_chain_tail(page, NULL); 18345bb912bSLars Ellenberg spin_lock(&drbd_pp_lock); 18445bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 18545bb912bSLars Ellenberg drbd_pp_vacant += i; 18645bb912bSLars Ellenberg spin_unlock(&drbd_pp_lock); 18745bb912bSLars Ellenberg } 18845bb912bSLars Ellenberg return NULL; 189b411b363SPhilipp Reisner } 190b411b363SPhilipp Reisner 191b30ab791SAndreas Gruenbacher static void reclaim_finished_net_peer_reqs(struct drbd_device *device, 192a990be46SAndreas Gruenbacher struct list_head *to_be_freed) 193b411b363SPhilipp Reisner { 194a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req, *tmp; 195b411b363SPhilipp Reisner 196b411b363SPhilipp Reisner /* The EEs are always appended to the end of the list. Since 197b411b363SPhilipp Reisner they are sent in order over the wire, they have to finish 198b411b363SPhilipp Reisner in order. As soon as we see the first not finished we can 199b411b363SPhilipp Reisner stop to examine the list... */ 200b411b363SPhilipp Reisner 201a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { 202045417f7SAndreas Gruenbacher if (drbd_peer_req_has_active_page(peer_req)) 203b411b363SPhilipp Reisner break; 204a8cd15baSAndreas Gruenbacher list_move(&peer_req->w.list, to_be_freed); 205b411b363SPhilipp Reisner } 206b411b363SPhilipp Reisner } 207b411b363SPhilipp Reisner 208668700b4SPhilipp Reisner static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) 209b411b363SPhilipp Reisner { 210b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 211db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 212b411b363SPhilipp Reisner 2130500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 214b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 2150500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 216a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 217b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 218b411b363SPhilipp Reisner } 219b411b363SPhilipp Reisner 220668700b4SPhilipp Reisner static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) 221668700b4SPhilipp Reisner { 222668700b4SPhilipp Reisner struct drbd_peer_device *peer_device; 223668700b4SPhilipp Reisner int vnr; 224668700b4SPhilipp Reisner 225668700b4SPhilipp Reisner rcu_read_lock(); 226668700b4SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 227668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device; 228668700b4SPhilipp Reisner if (!atomic_read(&device->pp_in_use_by_net)) 229668700b4SPhilipp Reisner continue; 230668700b4SPhilipp Reisner 231668700b4SPhilipp Reisner kref_get(&device->kref); 232668700b4SPhilipp Reisner rcu_read_unlock(); 233668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 234668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 235668700b4SPhilipp Reisner rcu_read_lock(); 236668700b4SPhilipp Reisner } 237668700b4SPhilipp Reisner rcu_read_unlock(); 238668700b4SPhilipp Reisner } 239668700b4SPhilipp Reisner 240b411b363SPhilipp Reisner /** 241c37c8ecfSAndreas Gruenbacher * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) 242b30ab791SAndreas Gruenbacher * @device: DRBD device. 24345bb912bSLars Ellenberg * @number: number of pages requested 24445bb912bSLars Ellenberg * @retry: whether to retry, if not enough pages are available right now 245b411b363SPhilipp Reisner * 24645bb912bSLars Ellenberg * Tries to allocate number pages, first from our own page pool, then from 2470e49d7b0SLars Ellenberg * the kernel. 24845bb912bSLars Ellenberg * Possibly retry until DRBD frees sufficient pages somewhere else. 24945bb912bSLars Ellenberg * 2500e49d7b0SLars Ellenberg * If this allocation would exceed the max_buffers setting, we throttle 2510e49d7b0SLars Ellenberg * allocation (schedule_timeout) to give the system some room to breathe. 2520e49d7b0SLars Ellenberg * 2530e49d7b0SLars Ellenberg * We do not use max-buffers as hard limit, because it could lead to 2540e49d7b0SLars Ellenberg * congestion and further to a distributed deadlock during online-verify or 2550e49d7b0SLars Ellenberg * (checksum based) resync, if the max-buffers, socket buffer sizes and 2560e49d7b0SLars Ellenberg * resync-rate settings are mis-configured. 2570e49d7b0SLars Ellenberg * 25845bb912bSLars Ellenberg * Returns a page chain linked via page->private. 259b411b363SPhilipp Reisner */ 26069a22773SAndreas Gruenbacher struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, 261c37c8ecfSAndreas Gruenbacher bool retry) 262b411b363SPhilipp Reisner { 26369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 264b411b363SPhilipp Reisner struct page *page = NULL; 26544ed167dSPhilipp Reisner struct net_conf *nc; 266b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2670e49d7b0SLars Ellenberg unsigned int mxb; 268b411b363SPhilipp Reisner 26944ed167dSPhilipp Reisner rcu_read_lock(); 27069a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 27144ed167dSPhilipp Reisner mxb = nc ? nc->max_buffers : 1000000; 27244ed167dSPhilipp Reisner rcu_read_unlock(); 27344ed167dSPhilipp Reisner 274b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) 275b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 276b411b363SPhilipp Reisner 277668700b4SPhilipp Reisner /* Try to keep the fast path fast, but occasionally we need 278668700b4SPhilipp Reisner * to reclaim the pages we lended to the network stack. */ 279668700b4SPhilipp Reisner if (page && atomic_read(&device->pp_in_use_by_net) > 512) 280668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 281668700b4SPhilipp Reisner 28245bb912bSLars Ellenberg while (page == NULL) { 283b411b363SPhilipp Reisner prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); 284b411b363SPhilipp Reisner 285668700b4SPhilipp Reisner drbd_reclaim_net_peer_reqs(device); 286b411b363SPhilipp Reisner 287b30ab791SAndreas Gruenbacher if (atomic_read(&device->pp_in_use) < mxb) { 288b30ab791SAndreas Gruenbacher page = __drbd_alloc_pages(device, number); 289b411b363SPhilipp Reisner if (page) 290b411b363SPhilipp Reisner break; 291b411b363SPhilipp Reisner } 292b411b363SPhilipp Reisner 293b411b363SPhilipp Reisner if (!retry) 294b411b363SPhilipp Reisner break; 295b411b363SPhilipp Reisner 296b411b363SPhilipp Reisner if (signal_pending(current)) { 297d0180171SAndreas Gruenbacher drbd_warn(device, "drbd_alloc_pages interrupted!\n"); 298b411b363SPhilipp Reisner break; 299b411b363SPhilipp Reisner } 300b411b363SPhilipp Reisner 3010e49d7b0SLars Ellenberg if (schedule_timeout(HZ/10) == 0) 3020e49d7b0SLars Ellenberg mxb = UINT_MAX; 303b411b363SPhilipp Reisner } 304b411b363SPhilipp Reisner finish_wait(&drbd_pp_wait, &wait); 305b411b363SPhilipp Reisner 30645bb912bSLars Ellenberg if (page) 307b30ab791SAndreas Gruenbacher atomic_add(number, &device->pp_in_use); 308b411b363SPhilipp Reisner return page; 309b411b363SPhilipp Reisner } 310b411b363SPhilipp Reisner 311c37c8ecfSAndreas Gruenbacher /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages. 3120500813fSAndreas Gruenbacher * Is also used from inside an other spin_lock_irq(&resource->req_lock); 31345bb912bSLars Ellenberg * Either links the page chain back to the global pool, 31445bb912bSLars Ellenberg * or returns all pages to the system. */ 315b30ab791SAndreas Gruenbacher static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) 316b411b363SPhilipp Reisner { 317b30ab791SAndreas Gruenbacher atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; 318b411b363SPhilipp Reisner int i; 319435f0740SLars Ellenberg 320a73ff323SLars Ellenberg if (page == NULL) 321a73ff323SLars Ellenberg return; 322a73ff323SLars Ellenberg 323183ece30SRoland Kammerer if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) 32445bb912bSLars Ellenberg i = page_chain_free(page); 32545bb912bSLars Ellenberg else { 32645bb912bSLars Ellenberg struct page *tmp; 32745bb912bSLars Ellenberg tmp = page_chain_tail(page, &i); 328b411b363SPhilipp Reisner spin_lock(&drbd_pp_lock); 32945bb912bSLars Ellenberg page_chain_add(&drbd_pp_pool, page, tmp); 33045bb912bSLars Ellenberg drbd_pp_vacant += i; 331b411b363SPhilipp Reisner spin_unlock(&drbd_pp_lock); 332b411b363SPhilipp Reisner } 333435f0740SLars Ellenberg i = atomic_sub_return(i, a); 33445bb912bSLars Ellenberg if (i < 0) 335d0180171SAndreas Gruenbacher drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", 336435f0740SLars Ellenberg is_net ? "pp_in_use_by_net" : "pp_in_use", i); 337b411b363SPhilipp Reisner wake_up(&drbd_pp_wait); 338b411b363SPhilipp Reisner } 339b411b363SPhilipp Reisner 340b411b363SPhilipp Reisner /* 341b411b363SPhilipp Reisner You need to hold the req_lock: 342b411b363SPhilipp Reisner _drbd_wait_ee_list_empty() 343b411b363SPhilipp Reisner 344b411b363SPhilipp Reisner You must not have the req_lock: 3453967deb1SAndreas Gruenbacher drbd_free_peer_req() 3460db55363SAndreas Gruenbacher drbd_alloc_peer_req() 3477721f567SAndreas Gruenbacher drbd_free_peer_reqs() 348b411b363SPhilipp Reisner drbd_ee_fix_bhs() 349a990be46SAndreas Gruenbacher drbd_finish_peer_reqs() 350b411b363SPhilipp Reisner drbd_clear_done_ee() 351b411b363SPhilipp Reisner drbd_wait_ee_list_empty() 352b411b363SPhilipp Reisner */ 353b411b363SPhilipp Reisner 3549104d31aSLars Ellenberg /* normal: payload_size == request size (bi_size) 3559104d31aSLars Ellenberg * w_same: payload_size == logical_block_size 3569104d31aSLars Ellenberg * trim: payload_size == 0 */ 357f6ffca9fSAndreas Gruenbacher struct drbd_peer_request * 35869a22773SAndreas Gruenbacher drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 3599104d31aSLars Ellenberg unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local) 360b411b363SPhilipp Reisner { 36169a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 362db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 363a73ff323SLars Ellenberg struct page *page = NULL; 3649104d31aSLars Ellenberg unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT; 365b411b363SPhilipp Reisner 366b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) 367b411b363SPhilipp Reisner return NULL; 368b411b363SPhilipp Reisner 3690892fac8SKent Overstreet peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); 370db830c46SAndreas Gruenbacher if (!peer_req) { 371b411b363SPhilipp Reisner if (!(gfp_mask & __GFP_NOWARN)) 372d0180171SAndreas Gruenbacher drbd_err(device, "%s: allocation failed\n", __func__); 373b411b363SPhilipp Reisner return NULL; 374b411b363SPhilipp Reisner } 375b411b363SPhilipp Reisner 3769104d31aSLars Ellenberg if (nr_pages) { 377d0164adcSMel Gorman page = drbd_alloc_pages(peer_device, nr_pages, 378d0164adcSMel Gorman gfpflags_allow_blocking(gfp_mask)); 37945bb912bSLars Ellenberg if (!page) 38045bb912bSLars Ellenberg goto fail; 381a73ff323SLars Ellenberg } 382b411b363SPhilipp Reisner 383c5a2c150SLars Ellenberg memset(peer_req, 0, sizeof(*peer_req)); 384c5a2c150SLars Ellenberg INIT_LIST_HEAD(&peer_req->w.list); 385db830c46SAndreas Gruenbacher drbd_clear_interval(&peer_req->i); 3869104d31aSLars Ellenberg peer_req->i.size = request_size; 387db830c46SAndreas Gruenbacher peer_req->i.sector = sector; 388c5a2c150SLars Ellenberg peer_req->submit_jif = jiffies; 389a8cd15baSAndreas Gruenbacher peer_req->peer_device = peer_device; 390db830c46SAndreas Gruenbacher peer_req->pages = page; 3919a8e7753SAndreas Gruenbacher /* 3929a8e7753SAndreas Gruenbacher * The block_id is opaque to the receiver. It is not endianness 3939a8e7753SAndreas Gruenbacher * converted, and sent back to the sender unchanged. 3949a8e7753SAndreas Gruenbacher */ 395db830c46SAndreas Gruenbacher peer_req->block_id = id; 396b411b363SPhilipp Reisner 397db830c46SAndreas Gruenbacher return peer_req; 398b411b363SPhilipp Reisner 39945bb912bSLars Ellenberg fail: 4000892fac8SKent Overstreet mempool_free(peer_req, &drbd_ee_mempool); 401b411b363SPhilipp Reisner return NULL; 402b411b363SPhilipp Reisner } 403b411b363SPhilipp Reisner 404b30ab791SAndreas Gruenbacher void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 405f6ffca9fSAndreas Gruenbacher int is_net) 406b411b363SPhilipp Reisner { 40721ae5d7fSLars Ellenberg might_sleep(); 408db830c46SAndreas Gruenbacher if (peer_req->flags & EE_HAS_DIGEST) 409db830c46SAndreas Gruenbacher kfree(peer_req->digest); 410b30ab791SAndreas Gruenbacher drbd_free_pages(device, peer_req->pages, is_net); 4110b0ba1efSAndreas Gruenbacher D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 4120b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 41321ae5d7fSLars Ellenberg if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { 41421ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 41521ae5d7fSLars Ellenberg drbd_al_complete_io(device, &peer_req->i); 41621ae5d7fSLars Ellenberg } 4170892fac8SKent Overstreet mempool_free(peer_req, &drbd_ee_mempool); 418b411b363SPhilipp Reisner } 419b411b363SPhilipp Reisner 420b30ab791SAndreas Gruenbacher int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) 421b411b363SPhilipp Reisner { 422b411b363SPhilipp Reisner LIST_HEAD(work_list); 423db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 424b411b363SPhilipp Reisner int count = 0; 425b30ab791SAndreas Gruenbacher int is_net = list == &device->net_ee; 426b411b363SPhilipp Reisner 4270500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 428b411b363SPhilipp Reisner list_splice_init(list, &work_list); 4290500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 430b411b363SPhilipp Reisner 431a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 432b30ab791SAndreas Gruenbacher __drbd_free_peer_req(device, peer_req, is_net); 433b411b363SPhilipp Reisner count++; 434b411b363SPhilipp Reisner } 435b411b363SPhilipp Reisner return count; 436b411b363SPhilipp Reisner } 437b411b363SPhilipp Reisner 438b411b363SPhilipp Reisner /* 439a990be46SAndreas Gruenbacher * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier. 440b411b363SPhilipp Reisner */ 441b30ab791SAndreas Gruenbacher static int drbd_finish_peer_reqs(struct drbd_device *device) 442b411b363SPhilipp Reisner { 443b411b363SPhilipp Reisner LIST_HEAD(work_list); 444b411b363SPhilipp Reisner LIST_HEAD(reclaimed); 445db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req, *t; 446e2b3032bSAndreas Gruenbacher int err = 0; 447b411b363SPhilipp Reisner 4480500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 449b30ab791SAndreas Gruenbacher reclaim_finished_net_peer_reqs(device, &reclaimed); 450b30ab791SAndreas Gruenbacher list_splice_init(&device->done_ee, &work_list); 4510500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 452b411b363SPhilipp Reisner 453a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) 454b30ab791SAndreas Gruenbacher drbd_free_net_peer_req(device, peer_req); 455b411b363SPhilipp Reisner 456b411b363SPhilipp Reisner /* possible callbacks here: 457d4dabbe2SLars Ellenberg * e_end_block, and e_end_resync_block, e_send_superseded. 458b411b363SPhilipp Reisner * all ignore the last argument. 459b411b363SPhilipp Reisner */ 460a8cd15baSAndreas Gruenbacher list_for_each_entry_safe(peer_req, t, &work_list, w.list) { 461e2b3032bSAndreas Gruenbacher int err2; 462e2b3032bSAndreas Gruenbacher 463b411b363SPhilipp Reisner /* list_del not necessary, next/prev members not touched */ 464a8cd15baSAndreas Gruenbacher err2 = peer_req->w.cb(&peer_req->w, !!err); 465e2b3032bSAndreas Gruenbacher if (!err) 466e2b3032bSAndreas Gruenbacher err = err2; 467b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 468b411b363SPhilipp Reisner } 469b30ab791SAndreas Gruenbacher wake_up(&device->ee_wait); 470b411b363SPhilipp Reisner 471e2b3032bSAndreas Gruenbacher return err; 472b411b363SPhilipp Reisner } 473b411b363SPhilipp Reisner 474b30ab791SAndreas Gruenbacher static void _drbd_wait_ee_list_empty(struct drbd_device *device, 475d4da1537SAndreas Gruenbacher struct list_head *head) 476b411b363SPhilipp Reisner { 477b411b363SPhilipp Reisner DEFINE_WAIT(wait); 478b411b363SPhilipp Reisner 479b411b363SPhilipp Reisner /* avoids spin_lock/unlock 480b411b363SPhilipp Reisner * and calling prepare_to_wait in the fast path */ 481b411b363SPhilipp Reisner while (!list_empty(head)) { 482b30ab791SAndreas Gruenbacher prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE); 4830500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4847eaceaccSJens Axboe io_schedule(); 485b30ab791SAndreas Gruenbacher finish_wait(&device->ee_wait, &wait); 4860500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 487b411b363SPhilipp Reisner } 488b411b363SPhilipp Reisner } 489b411b363SPhilipp Reisner 490b30ab791SAndreas Gruenbacher static void drbd_wait_ee_list_empty(struct drbd_device *device, 491d4da1537SAndreas Gruenbacher struct list_head *head) 492b411b363SPhilipp Reisner { 4930500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 494b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, head); 4950500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 496b411b363SPhilipp Reisner } 497b411b363SPhilipp Reisner 498dbd9eea0SPhilipp Reisner static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 499b411b363SPhilipp Reisner { 500b411b363SPhilipp Reisner struct kvec iov = { 501b411b363SPhilipp Reisner .iov_base = buf, 502b411b363SPhilipp Reisner .iov_len = size, 503b411b363SPhilipp Reisner }; 504b411b363SPhilipp Reisner struct msghdr msg = { 505b411b363SPhilipp Reisner .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 506b411b363SPhilipp Reisner }; 507aa563d7bSDavid Howells iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); 508f7765c36SAl Viro return sock_recvmsg(sock, &msg, msg.msg_flags); 509b411b363SPhilipp Reisner } 510b411b363SPhilipp Reisner 511bde89a9eSAndreas Gruenbacher static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 512b411b363SPhilipp Reisner { 513b411b363SPhilipp Reisner int rv; 514b411b363SPhilipp Reisner 515bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->data.socket, buf, size, 0); 516b411b363SPhilipp Reisner 517b411b363SPhilipp Reisner if (rv < 0) { 518b411b363SPhilipp Reisner if (rv == -ECONNRESET) 5191ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was reset by peer\n"); 520b411b363SPhilipp Reisner else if (rv != -ERESTARTSYS) 5211ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 522b411b363SPhilipp Reisner } else if (rv == 0) { 523bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 524b66623e3SPhilipp Reisner long t; 525b66623e3SPhilipp Reisner rcu_read_lock(); 526bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 527b66623e3SPhilipp Reisner rcu_read_unlock(); 528b66623e3SPhilipp Reisner 529bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t); 530b66623e3SPhilipp Reisner 531599377acSPhilipp Reisner if (t) 532599377acSPhilipp Reisner goto out; 533599377acSPhilipp Reisner } 5341ec861ebSAndreas Gruenbacher drbd_info(connection, "sock was shut down by peer\n"); 535599377acSPhilipp Reisner } 536599377acSPhilipp Reisner 537b411b363SPhilipp Reisner if (rv != size) 538bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD); 539b411b363SPhilipp Reisner 540599377acSPhilipp Reisner out: 541b411b363SPhilipp Reisner return rv; 542b411b363SPhilipp Reisner } 543b411b363SPhilipp Reisner 544bde89a9eSAndreas Gruenbacher static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size) 545c6967746SAndreas Gruenbacher { 546c6967746SAndreas Gruenbacher int err; 547c6967746SAndreas Gruenbacher 548bde89a9eSAndreas Gruenbacher err = drbd_recv(connection, buf, size); 549c6967746SAndreas Gruenbacher if (err != size) { 550c6967746SAndreas Gruenbacher if (err >= 0) 551c6967746SAndreas Gruenbacher err = -EIO; 552c6967746SAndreas Gruenbacher } else 553c6967746SAndreas Gruenbacher err = 0; 554c6967746SAndreas Gruenbacher return err; 555c6967746SAndreas Gruenbacher } 556c6967746SAndreas Gruenbacher 557bde89a9eSAndreas Gruenbacher static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size) 558a5c31904SAndreas Gruenbacher { 559a5c31904SAndreas Gruenbacher int err; 560a5c31904SAndreas Gruenbacher 561bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, buf, size); 562a5c31904SAndreas Gruenbacher if (err && !signal_pending(current)) 5631ec861ebSAndreas Gruenbacher drbd_warn(connection, "short read (expected size %d)\n", (int)size); 564a5c31904SAndreas Gruenbacher return err; 565a5c31904SAndreas Gruenbacher } 566a5c31904SAndreas Gruenbacher 5675dbf1673SLars Ellenberg /* quoting tcp(7): 5685dbf1673SLars Ellenberg * On individual connections, the socket buffer size must be set prior to the 5695dbf1673SLars Ellenberg * listen(2) or connect(2) calls in order to have it take effect. 5705dbf1673SLars Ellenberg * This is our wrapper to do so. 5715dbf1673SLars Ellenberg */ 5725dbf1673SLars Ellenberg static void drbd_setbufsize(struct socket *sock, unsigned int snd, 5735dbf1673SLars Ellenberg unsigned int rcv) 5745dbf1673SLars Ellenberg { 5755dbf1673SLars Ellenberg /* open coded SO_SNDBUF, SO_RCVBUF */ 5765dbf1673SLars Ellenberg if (snd) { 5775dbf1673SLars Ellenberg sock->sk->sk_sndbuf = snd; 5785dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5795dbf1673SLars Ellenberg } 5805dbf1673SLars Ellenberg if (rcv) { 5815dbf1673SLars Ellenberg sock->sk->sk_rcvbuf = rcv; 5825dbf1673SLars Ellenberg sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5835dbf1673SLars Ellenberg } 5845dbf1673SLars Ellenberg } 5855dbf1673SLars Ellenberg 586bde89a9eSAndreas Gruenbacher static struct socket *drbd_try_connect(struct drbd_connection *connection) 587b411b363SPhilipp Reisner { 588b411b363SPhilipp Reisner const char *what; 589b411b363SPhilipp Reisner struct socket *sock; 590b411b363SPhilipp Reisner struct sockaddr_in6 src_in6; 59144ed167dSPhilipp Reisner struct sockaddr_in6 peer_in6; 59244ed167dSPhilipp Reisner struct net_conf *nc; 59344ed167dSPhilipp Reisner int err, peer_addr_len, my_addr_len; 59469ef82deSAndreas Gruenbacher int sndbuf_size, rcvbuf_size, connect_int; 595b411b363SPhilipp Reisner int disconnect_on_error = 1; 596b411b363SPhilipp Reisner 59744ed167dSPhilipp Reisner rcu_read_lock(); 598bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 59944ed167dSPhilipp Reisner if (!nc) { 60044ed167dSPhilipp Reisner rcu_read_unlock(); 601b411b363SPhilipp Reisner return NULL; 60244ed167dSPhilipp Reisner } 60344ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 60444ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 60569ef82deSAndreas Gruenbacher connect_int = nc->connect_int; 606089c075dSAndreas Gruenbacher rcu_read_unlock(); 60744ed167dSPhilipp Reisner 608bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6)); 609bde89a9eSAndreas Gruenbacher memcpy(&src_in6, &connection->my_addr, my_addr_len); 61044ed167dSPhilipp Reisner 611bde89a9eSAndreas Gruenbacher if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6) 61244ed167dSPhilipp Reisner src_in6.sin6_port = 0; 61344ed167dSPhilipp Reisner else 61444ed167dSPhilipp Reisner ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ 61544ed167dSPhilipp Reisner 616bde89a9eSAndreas Gruenbacher peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6)); 617bde89a9eSAndreas Gruenbacher memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); 618b411b363SPhilipp Reisner 619b411b363SPhilipp Reisner what = "sock_create_kern"; 620eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, 621b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &sock); 622b411b363SPhilipp Reisner if (err < 0) { 623b411b363SPhilipp Reisner sock = NULL; 624b411b363SPhilipp Reisner goto out; 625b411b363SPhilipp Reisner } 626b411b363SPhilipp Reisner 627b411b363SPhilipp Reisner sock->sk->sk_rcvtimeo = 62869ef82deSAndreas Gruenbacher sock->sk->sk_sndtimeo = connect_int * HZ; 62944ed167dSPhilipp Reisner drbd_setbufsize(sock, sndbuf_size, rcvbuf_size); 630b411b363SPhilipp Reisner 631b411b363SPhilipp Reisner /* explicitly bind to the configured IP as source IP 632b411b363SPhilipp Reisner * for the outgoing connections. 633b411b363SPhilipp Reisner * This is needed for multihomed hosts and to be 634b411b363SPhilipp Reisner * able to use lo: interfaces for drbd. 635b411b363SPhilipp Reisner * Make sure to use 0 as port number, so linux selects 636b411b363SPhilipp Reisner * a free one dynamically. 637b411b363SPhilipp Reisner */ 638b411b363SPhilipp Reisner what = "bind before connect"; 63944ed167dSPhilipp Reisner err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len); 640b411b363SPhilipp Reisner if (err < 0) 641b411b363SPhilipp Reisner goto out; 642b411b363SPhilipp Reisner 643b411b363SPhilipp Reisner /* connect may fail, peer not yet available. 644b411b363SPhilipp Reisner * stay C_WF_CONNECTION, don't go Disconnecting! */ 645b411b363SPhilipp Reisner disconnect_on_error = 0; 646b411b363SPhilipp Reisner what = "connect"; 64744ed167dSPhilipp Reisner err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0); 648b411b363SPhilipp Reisner 649b411b363SPhilipp Reisner out: 650b411b363SPhilipp Reisner if (err < 0) { 651b411b363SPhilipp Reisner if (sock) { 652b411b363SPhilipp Reisner sock_release(sock); 653b411b363SPhilipp Reisner sock = NULL; 654b411b363SPhilipp Reisner } 655b411b363SPhilipp Reisner switch (-err) { 656b411b363SPhilipp Reisner /* timeout, busy, signal pending */ 657b411b363SPhilipp Reisner case ETIMEDOUT: case EAGAIN: case EINPROGRESS: 658b411b363SPhilipp Reisner case EINTR: case ERESTARTSYS: 659b411b363SPhilipp Reisner /* peer not (yet) available, network problem */ 660b411b363SPhilipp Reisner case ECONNREFUSED: case ENETUNREACH: 661b411b363SPhilipp Reisner case EHOSTDOWN: case EHOSTUNREACH: 662b411b363SPhilipp Reisner disconnect_on_error = 0; 663b411b363SPhilipp Reisner break; 664b411b363SPhilipp Reisner default: 6651ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 666b411b363SPhilipp Reisner } 667b411b363SPhilipp Reisner if (disconnect_on_error) 668bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 669b411b363SPhilipp Reisner } 67044ed167dSPhilipp Reisner 671b411b363SPhilipp Reisner return sock; 672b411b363SPhilipp Reisner } 673b411b363SPhilipp Reisner 6747a426fd8SPhilipp Reisner struct accept_wait_data { 675bde89a9eSAndreas Gruenbacher struct drbd_connection *connection; 6767a426fd8SPhilipp Reisner struct socket *s_listen; 6777a426fd8SPhilipp Reisner struct completion door_bell; 6787a426fd8SPhilipp Reisner void (*original_sk_state_change)(struct sock *sk); 6797a426fd8SPhilipp Reisner 6807a426fd8SPhilipp Reisner }; 6817a426fd8SPhilipp Reisner 682715306f6SAndreas Gruenbacher static void drbd_incoming_connection(struct sock *sk) 683b411b363SPhilipp Reisner { 6847a426fd8SPhilipp Reisner struct accept_wait_data *ad = sk->sk_user_data; 685715306f6SAndreas Gruenbacher void (*state_change)(struct sock *sk); 6867a426fd8SPhilipp Reisner 687715306f6SAndreas Gruenbacher state_change = ad->original_sk_state_change; 688715306f6SAndreas Gruenbacher if (sk->sk_state == TCP_ESTABLISHED) 6897a426fd8SPhilipp Reisner complete(&ad->door_bell); 690715306f6SAndreas Gruenbacher state_change(sk); 6917a426fd8SPhilipp Reisner } 6927a426fd8SPhilipp Reisner 693bde89a9eSAndreas Gruenbacher static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad) 694b411b363SPhilipp Reisner { 6951f3e509bSPhilipp Reisner int err, sndbuf_size, rcvbuf_size, my_addr_len; 69644ed167dSPhilipp Reisner struct sockaddr_in6 my_addr; 6971f3e509bSPhilipp Reisner struct socket *s_listen; 69844ed167dSPhilipp Reisner struct net_conf *nc; 699b411b363SPhilipp Reisner const char *what; 700b411b363SPhilipp Reisner 70144ed167dSPhilipp Reisner rcu_read_lock(); 702bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 70344ed167dSPhilipp Reisner if (!nc) { 70444ed167dSPhilipp Reisner rcu_read_unlock(); 7057a426fd8SPhilipp Reisner return -EIO; 70644ed167dSPhilipp Reisner } 70744ed167dSPhilipp Reisner sndbuf_size = nc->sndbuf_size; 70844ed167dSPhilipp Reisner rcvbuf_size = nc->rcvbuf_size; 70944ed167dSPhilipp Reisner rcu_read_unlock(); 710b411b363SPhilipp Reisner 711bde89a9eSAndreas Gruenbacher my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6)); 712bde89a9eSAndreas Gruenbacher memcpy(&my_addr, &connection->my_addr, my_addr_len); 713b411b363SPhilipp Reisner 714b411b363SPhilipp Reisner what = "sock_create_kern"; 715eeb1bd5cSEric W. Biederman err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, 716b411b363SPhilipp Reisner SOCK_STREAM, IPPROTO_TCP, &s_listen); 717b411b363SPhilipp Reisner if (err) { 718b411b363SPhilipp Reisner s_listen = NULL; 719b411b363SPhilipp Reisner goto out; 720b411b363SPhilipp Reisner } 721b411b363SPhilipp Reisner 7224a17fd52SPavel Emelyanov s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 72344ed167dSPhilipp Reisner drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size); 724b411b363SPhilipp Reisner 725b411b363SPhilipp Reisner what = "bind before listen"; 72644ed167dSPhilipp Reisner err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len); 727b411b363SPhilipp Reisner if (err < 0) 728b411b363SPhilipp Reisner goto out; 729b411b363SPhilipp Reisner 7307a426fd8SPhilipp Reisner ad->s_listen = s_listen; 7317a426fd8SPhilipp Reisner write_lock_bh(&s_listen->sk->sk_callback_lock); 7327a426fd8SPhilipp Reisner ad->original_sk_state_change = s_listen->sk->sk_state_change; 733715306f6SAndreas Gruenbacher s_listen->sk->sk_state_change = drbd_incoming_connection; 7347a426fd8SPhilipp Reisner s_listen->sk->sk_user_data = ad; 7357a426fd8SPhilipp Reisner write_unlock_bh(&s_listen->sk->sk_callback_lock); 736b411b363SPhilipp Reisner 7372820fd39SPhilipp Reisner what = "listen"; 7382820fd39SPhilipp Reisner err = s_listen->ops->listen(s_listen, 5); 7392820fd39SPhilipp Reisner if (err < 0) 7402820fd39SPhilipp Reisner goto out; 7412820fd39SPhilipp Reisner 7427a426fd8SPhilipp Reisner return 0; 743b411b363SPhilipp Reisner out: 744b411b363SPhilipp Reisner if (s_listen) 745b411b363SPhilipp Reisner sock_release(s_listen); 746b411b363SPhilipp Reisner if (err < 0) { 747b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7481ec861ebSAndreas Gruenbacher drbd_err(connection, "%s failed, err = %d\n", what, err); 749bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 750b411b363SPhilipp Reisner } 751b411b363SPhilipp Reisner } 7521f3e509bSPhilipp Reisner 7537a426fd8SPhilipp Reisner return -EIO; 7541f3e509bSPhilipp Reisner } 7551f3e509bSPhilipp Reisner 756715306f6SAndreas Gruenbacher static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad) 757715306f6SAndreas Gruenbacher { 758715306f6SAndreas Gruenbacher write_lock_bh(&sk->sk_callback_lock); 759715306f6SAndreas Gruenbacher sk->sk_state_change = ad->original_sk_state_change; 760715306f6SAndreas Gruenbacher sk->sk_user_data = NULL; 761715306f6SAndreas Gruenbacher write_unlock_bh(&sk->sk_callback_lock); 762715306f6SAndreas Gruenbacher } 763715306f6SAndreas Gruenbacher 764bde89a9eSAndreas Gruenbacher static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad) 7651f3e509bSPhilipp Reisner { 7661f3e509bSPhilipp Reisner int timeo, connect_int, err = 0; 7671f3e509bSPhilipp Reisner struct socket *s_estab = NULL; 7681f3e509bSPhilipp Reisner struct net_conf *nc; 7691f3e509bSPhilipp Reisner 7701f3e509bSPhilipp Reisner rcu_read_lock(); 771bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 7721f3e509bSPhilipp Reisner if (!nc) { 7731f3e509bSPhilipp Reisner rcu_read_unlock(); 7741f3e509bSPhilipp Reisner return NULL; 7751f3e509bSPhilipp Reisner } 7761f3e509bSPhilipp Reisner connect_int = nc->connect_int; 7771f3e509bSPhilipp Reisner rcu_read_unlock(); 7781f3e509bSPhilipp Reisner 7791f3e509bSPhilipp Reisner timeo = connect_int * HZ; 78038b682b2SAkinobu Mita /* 28.5% random jitter */ 78138b682b2SAkinobu Mita timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7; 7821f3e509bSPhilipp Reisner 7837a426fd8SPhilipp Reisner err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo); 7847a426fd8SPhilipp Reisner if (err <= 0) 7857a426fd8SPhilipp Reisner return NULL; 7861f3e509bSPhilipp Reisner 7877a426fd8SPhilipp Reisner err = kernel_accept(ad->s_listen, &s_estab, 0); 788b411b363SPhilipp Reisner if (err < 0) { 789b411b363SPhilipp Reisner if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { 7901ec861ebSAndreas Gruenbacher drbd_err(connection, "accept failed, err = %d\n", err); 791bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 792b411b363SPhilipp Reisner } 793b411b363SPhilipp Reisner } 794b411b363SPhilipp Reisner 795715306f6SAndreas Gruenbacher if (s_estab) 796715306f6SAndreas Gruenbacher unregister_state_change(s_estab->sk, ad); 797b411b363SPhilipp Reisner 798b411b363SPhilipp Reisner return s_estab; 799b411b363SPhilipp Reisner } 800b411b363SPhilipp Reisner 801bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *, void *, struct packet_info *); 802b411b363SPhilipp Reisner 803bde89a9eSAndreas Gruenbacher static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock, 8049f5bdc33SAndreas Gruenbacher enum drbd_packet cmd) 8059f5bdc33SAndreas Gruenbacher { 806bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) 8079f5bdc33SAndreas Gruenbacher return -EIO; 808bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, cmd, 0, NULL, 0); 809b411b363SPhilipp Reisner } 810b411b363SPhilipp Reisner 811bde89a9eSAndreas Gruenbacher static int receive_first_packet(struct drbd_connection *connection, struct socket *sock) 812b411b363SPhilipp Reisner { 813bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 8149f5bdc33SAndreas Gruenbacher struct packet_info pi; 8154920e37aSPhilipp Reisner struct net_conf *nc; 8169f5bdc33SAndreas Gruenbacher int err; 817b411b363SPhilipp Reisner 8184920e37aSPhilipp Reisner rcu_read_lock(); 8194920e37aSPhilipp Reisner nc = rcu_dereference(connection->net_conf); 8204920e37aSPhilipp Reisner if (!nc) { 8214920e37aSPhilipp Reisner rcu_read_unlock(); 8224920e37aSPhilipp Reisner return -EIO; 8234920e37aSPhilipp Reisner } 8244920e37aSPhilipp Reisner sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; 8254920e37aSPhilipp Reisner rcu_read_unlock(); 8264920e37aSPhilipp Reisner 827bde89a9eSAndreas Gruenbacher err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 8289f5bdc33SAndreas Gruenbacher if (err != header_size) { 8299f5bdc33SAndreas Gruenbacher if (err >= 0) 8309f5bdc33SAndreas Gruenbacher err = -EIO; 8319f5bdc33SAndreas Gruenbacher return err; 8329f5bdc33SAndreas Gruenbacher } 833bde89a9eSAndreas Gruenbacher err = decode_header(connection, connection->data.rbuf, &pi); 8349f5bdc33SAndreas Gruenbacher if (err) 8359f5bdc33SAndreas Gruenbacher return err; 8369f5bdc33SAndreas Gruenbacher return pi.cmd; 837b411b363SPhilipp Reisner } 838b411b363SPhilipp Reisner 839b411b363SPhilipp Reisner /** 840b411b363SPhilipp Reisner * drbd_socket_okay() - Free the socket if its connection is not okay 841b411b363SPhilipp Reisner * @sock: pointer to the pointer to the socket. 842b411b363SPhilipp Reisner */ 8435d0b17f1SPhilipp Reisner static bool drbd_socket_okay(struct socket **sock) 844b411b363SPhilipp Reisner { 845b411b363SPhilipp Reisner int rr; 846b411b363SPhilipp Reisner char tb[4]; 847b411b363SPhilipp Reisner 848b411b363SPhilipp Reisner if (!*sock) 84981e84650SAndreas Gruenbacher return false; 850b411b363SPhilipp Reisner 851dbd9eea0SPhilipp Reisner rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); 852b411b363SPhilipp Reisner 853b411b363SPhilipp Reisner if (rr > 0 || rr == -EAGAIN) { 85481e84650SAndreas Gruenbacher return true; 855b411b363SPhilipp Reisner } else { 856b411b363SPhilipp Reisner sock_release(*sock); 857b411b363SPhilipp Reisner *sock = NULL; 85881e84650SAndreas Gruenbacher return false; 859b411b363SPhilipp Reisner } 860b411b363SPhilipp Reisner } 8615d0b17f1SPhilipp Reisner 8625d0b17f1SPhilipp Reisner static bool connection_established(struct drbd_connection *connection, 8635d0b17f1SPhilipp Reisner struct socket **sock1, 8645d0b17f1SPhilipp Reisner struct socket **sock2) 8655d0b17f1SPhilipp Reisner { 8665d0b17f1SPhilipp Reisner struct net_conf *nc; 8675d0b17f1SPhilipp Reisner int timeout; 8685d0b17f1SPhilipp Reisner bool ok; 8695d0b17f1SPhilipp Reisner 8705d0b17f1SPhilipp Reisner if (!*sock1 || !*sock2) 8715d0b17f1SPhilipp Reisner return false; 8725d0b17f1SPhilipp Reisner 8735d0b17f1SPhilipp Reisner rcu_read_lock(); 8745d0b17f1SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 8755d0b17f1SPhilipp Reisner timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; 8765d0b17f1SPhilipp Reisner rcu_read_unlock(); 8775d0b17f1SPhilipp Reisner schedule_timeout_interruptible(timeout); 8785d0b17f1SPhilipp Reisner 8795d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock1); 8805d0b17f1SPhilipp Reisner ok = drbd_socket_okay(sock2) && ok; 8815d0b17f1SPhilipp Reisner 8825d0b17f1SPhilipp Reisner return ok; 8835d0b17f1SPhilipp Reisner } 8845d0b17f1SPhilipp Reisner 8852325eb66SPhilipp Reisner /* Gets called if a connection is established, or if a new minor gets created 8862325eb66SPhilipp Reisner in a connection */ 88769a22773SAndreas Gruenbacher int drbd_connected(struct drbd_peer_device *peer_device) 888907599e0SPhilipp Reisner { 88969a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 8900829f5edSAndreas Gruenbacher int err; 891907599e0SPhilipp Reisner 892b30ab791SAndreas Gruenbacher atomic_set(&device->packet_seq, 0); 893b30ab791SAndreas Gruenbacher device->peer_seq = 0; 894907599e0SPhilipp Reisner 89569a22773SAndreas Gruenbacher device->state_mutex = peer_device->connection->agreed_pro_version < 100 ? 89669a22773SAndreas Gruenbacher &peer_device->connection->cstate_mutex : 897b30ab791SAndreas Gruenbacher &device->own_state_mutex; 8988410da8fSPhilipp Reisner 89969a22773SAndreas Gruenbacher err = drbd_send_sync_param(peer_device); 9000829f5edSAndreas Gruenbacher if (!err) 90169a22773SAndreas Gruenbacher err = drbd_send_sizes(peer_device, 0, 0); 9020829f5edSAndreas Gruenbacher if (!err) 90369a22773SAndreas Gruenbacher err = drbd_send_uuids(peer_device); 9040829f5edSAndreas Gruenbacher if (!err) 90569a22773SAndreas Gruenbacher err = drbd_send_current_state(peer_device); 906b30ab791SAndreas Gruenbacher clear_bit(USE_DEGR_WFC_T, &device->flags); 907b30ab791SAndreas Gruenbacher clear_bit(RESIZE_PENDING, &device->flags); 908b30ab791SAndreas Gruenbacher atomic_set(&device->ap_in_flight, 0); 909b30ab791SAndreas Gruenbacher mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */ 9100829f5edSAndreas Gruenbacher return err; 911907599e0SPhilipp Reisner } 912b411b363SPhilipp Reisner 913b411b363SPhilipp Reisner /* 914b411b363SPhilipp Reisner * return values: 915b411b363SPhilipp Reisner * 1 yes, we have a valid connection 916b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 917b411b363SPhilipp Reisner * -1 peer talks different language, 918b411b363SPhilipp Reisner * no point in trying again, please go standalone. 919b411b363SPhilipp Reisner * -2 We do not have a network config... 920b411b363SPhilipp Reisner */ 921bde89a9eSAndreas Gruenbacher static int conn_connect(struct drbd_connection *connection) 922b411b363SPhilipp Reisner { 9237da35862SPhilipp Reisner struct drbd_socket sock, msock; 924c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 92544ed167dSPhilipp Reisner struct net_conf *nc; 9265d0b17f1SPhilipp Reisner int vnr, timeout, h; 9275d0b17f1SPhilipp Reisner bool discard_my_data, ok; 928197296ffSPhilipp Reisner enum drbd_state_rv rv; 9297a426fd8SPhilipp Reisner struct accept_wait_data ad = { 930bde89a9eSAndreas Gruenbacher .connection = connection, 9317a426fd8SPhilipp Reisner .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), 9327a426fd8SPhilipp Reisner }; 933b411b363SPhilipp Reisner 934bde89a9eSAndreas Gruenbacher clear_bit(DISCONNECT_SENT, &connection->flags); 935bde89a9eSAndreas Gruenbacher if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) 936b411b363SPhilipp Reisner return -2; 937b411b363SPhilipp Reisner 9387da35862SPhilipp Reisner mutex_init(&sock.mutex); 939bde89a9eSAndreas Gruenbacher sock.sbuf = connection->data.sbuf; 940bde89a9eSAndreas Gruenbacher sock.rbuf = connection->data.rbuf; 9417da35862SPhilipp Reisner sock.socket = NULL; 9427da35862SPhilipp Reisner mutex_init(&msock.mutex); 943bde89a9eSAndreas Gruenbacher msock.sbuf = connection->meta.sbuf; 944bde89a9eSAndreas Gruenbacher msock.rbuf = connection->meta.rbuf; 9457da35862SPhilipp Reisner msock.socket = NULL; 9467da35862SPhilipp Reisner 9470916e0e3SAndreas Gruenbacher /* Assume that the peer only understands protocol 80 until we know better. */ 948bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = 80; 949b411b363SPhilipp Reisner 950bde89a9eSAndreas Gruenbacher if (prepare_listen_socket(connection, &ad)) 9517a426fd8SPhilipp Reisner return 0; 952b411b363SPhilipp Reisner 953b411b363SPhilipp Reisner do { 9542bf89621SAndreas Gruenbacher struct socket *s; 955b411b363SPhilipp Reisner 956bde89a9eSAndreas Gruenbacher s = drbd_try_connect(connection); 957b411b363SPhilipp Reisner if (s) { 9587da35862SPhilipp Reisner if (!sock.socket) { 9597da35862SPhilipp Reisner sock.socket = s; 960bde89a9eSAndreas Gruenbacher send_first_packet(connection, &sock, P_INITIAL_DATA); 9617da35862SPhilipp Reisner } else if (!msock.socket) { 962bde89a9eSAndreas Gruenbacher clear_bit(RESOLVE_CONFLICTS, &connection->flags); 9637da35862SPhilipp Reisner msock.socket = s; 964bde89a9eSAndreas Gruenbacher send_first_packet(connection, &msock, P_INITIAL_META); 965b411b363SPhilipp Reisner } else { 9661ec861ebSAndreas Gruenbacher drbd_err(connection, "Logic error in conn_connect()\n"); 967b411b363SPhilipp Reisner goto out_release_sockets; 968b411b363SPhilipp Reisner } 969b411b363SPhilipp Reisner } 970b411b363SPhilipp Reisner 9715d0b17f1SPhilipp Reisner if (connection_established(connection, &sock.socket, &msock.socket)) 972b411b363SPhilipp Reisner break; 973b411b363SPhilipp Reisner 974b411b363SPhilipp Reisner retry: 975bde89a9eSAndreas Gruenbacher s = drbd_wait_for_connect(connection, &ad); 976b411b363SPhilipp Reisner if (s) { 977bde89a9eSAndreas Gruenbacher int fp = receive_first_packet(connection, s); 9787da35862SPhilipp Reisner drbd_socket_okay(&sock.socket); 9797da35862SPhilipp Reisner drbd_socket_okay(&msock.socket); 98092f14951SPhilipp Reisner switch (fp) { 981e5d6f33aSAndreas Gruenbacher case P_INITIAL_DATA: 9827da35862SPhilipp Reisner if (sock.socket) { 9831ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet S crossed\n"); 9847da35862SPhilipp Reisner sock_release(sock.socket); 98580c6eed4SPhilipp Reisner sock.socket = s; 98680c6eed4SPhilipp Reisner goto randomize; 987b411b363SPhilipp Reisner } 9887da35862SPhilipp Reisner sock.socket = s; 989b411b363SPhilipp Reisner break; 990e5d6f33aSAndreas Gruenbacher case P_INITIAL_META: 991bde89a9eSAndreas Gruenbacher set_bit(RESOLVE_CONFLICTS, &connection->flags); 9927da35862SPhilipp Reisner if (msock.socket) { 9931ec861ebSAndreas Gruenbacher drbd_warn(connection, "initial packet M crossed\n"); 9947da35862SPhilipp Reisner sock_release(msock.socket); 99580c6eed4SPhilipp Reisner msock.socket = s; 99680c6eed4SPhilipp Reisner goto randomize; 997b411b363SPhilipp Reisner } 9987da35862SPhilipp Reisner msock.socket = s; 999b411b363SPhilipp Reisner break; 1000b411b363SPhilipp Reisner default: 10011ec861ebSAndreas Gruenbacher drbd_warn(connection, "Error receiving initial packet\n"); 1002b411b363SPhilipp Reisner sock_release(s); 100380c6eed4SPhilipp Reisner randomize: 100438b682b2SAkinobu Mita if (prandom_u32() & 1) 1005b411b363SPhilipp Reisner goto retry; 1006b411b363SPhilipp Reisner } 1007b411b363SPhilipp Reisner } 1008b411b363SPhilipp Reisner 1009bde89a9eSAndreas Gruenbacher if (connection->cstate <= C_DISCONNECTING) 1010b411b363SPhilipp Reisner goto out_release_sockets; 1011b411b363SPhilipp Reisner if (signal_pending(current)) { 1012b411b363SPhilipp Reisner flush_signals(current); 1013b411b363SPhilipp Reisner smp_rmb(); 1014bde89a9eSAndreas Gruenbacher if (get_t_state(&connection->receiver) == EXITING) 1015b411b363SPhilipp Reisner goto out_release_sockets; 1016b411b363SPhilipp Reisner } 1017b411b363SPhilipp Reisner 10185d0b17f1SPhilipp Reisner ok = connection_established(connection, &sock.socket, &msock.socket); 1019b666dbf8SPhilipp Reisner } while (!ok); 1020b411b363SPhilipp Reisner 10217a426fd8SPhilipp Reisner if (ad.s_listen) 10227a426fd8SPhilipp Reisner sock_release(ad.s_listen); 1023b411b363SPhilipp Reisner 102498683650SPhilipp Reisner sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 102598683650SPhilipp Reisner msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */ 1026b411b363SPhilipp Reisner 10277da35862SPhilipp Reisner sock.socket->sk->sk_allocation = GFP_NOIO; 10287da35862SPhilipp Reisner msock.socket->sk->sk_allocation = GFP_NOIO; 1029b411b363SPhilipp Reisner 10307da35862SPhilipp Reisner sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; 10317da35862SPhilipp Reisner msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; 1032b411b363SPhilipp Reisner 1033b411b363SPhilipp Reisner /* NOT YET ... 1034bde89a9eSAndreas Gruenbacher * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10; 10357da35862SPhilipp Reisner * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 10366038178eSAndreas Gruenbacher * first set it to the P_CONNECTION_FEATURES timeout, 1037b411b363SPhilipp Reisner * which we set to 4x the configured ping_timeout. */ 103844ed167dSPhilipp Reisner rcu_read_lock(); 1039bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 1040b411b363SPhilipp Reisner 10417da35862SPhilipp Reisner sock.socket->sk->sk_sndtimeo = 10427da35862SPhilipp Reisner sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10; 104344ed167dSPhilipp Reisner 10447da35862SPhilipp Reisner msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ; 104544ed167dSPhilipp Reisner timeout = nc->timeout * HZ / 10; 104608b165baSPhilipp Reisner discard_my_data = nc->discard_my_data; 104744ed167dSPhilipp Reisner rcu_read_unlock(); 104844ed167dSPhilipp Reisner 10497da35862SPhilipp Reisner msock.socket->sk->sk_sndtimeo = timeout; 1050b411b363SPhilipp Reisner 1051b411b363SPhilipp Reisner /* we don't want delays. 105225985edcSLucas De Marchi * we use TCP_CORK where appropriate, though */ 10537da35862SPhilipp Reisner drbd_tcp_nodelay(sock.socket); 10547da35862SPhilipp Reisner drbd_tcp_nodelay(msock.socket); 1055b411b363SPhilipp Reisner 1056bde89a9eSAndreas Gruenbacher connection->data.socket = sock.socket; 1057bde89a9eSAndreas Gruenbacher connection->meta.socket = msock.socket; 1058bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1059b411b363SPhilipp Reisner 1060bde89a9eSAndreas Gruenbacher h = drbd_do_features(connection); 1061b411b363SPhilipp Reisner if (h <= 0) 1062b411b363SPhilipp Reisner return h; 1063b411b363SPhilipp Reisner 1064bde89a9eSAndreas Gruenbacher if (connection->cram_hmac_tfm) { 1065b30ab791SAndreas Gruenbacher /* drbd_request_state(device, NS(conn, WFAuth)); */ 1066bde89a9eSAndreas Gruenbacher switch (drbd_do_auth(connection)) { 1067b10d96cbSJohannes Thoma case -1: 10681ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed\n"); 1069b411b363SPhilipp Reisner return -1; 1070b10d96cbSJohannes Thoma case 0: 10711ec861ebSAndreas Gruenbacher drbd_err(connection, "Authentication of peer failed, trying again.\n"); 1072b10d96cbSJohannes Thoma return 0; 1073b411b363SPhilipp Reisner } 1074b411b363SPhilipp Reisner } 1075b411b363SPhilipp Reisner 1076bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_sndtimeo = timeout; 1077bde89a9eSAndreas Gruenbacher connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1078b411b363SPhilipp Reisner 1079bde89a9eSAndreas Gruenbacher if (drbd_send_protocol(connection) == -EOPNOTSUPP) 10807e2455c1SPhilipp Reisner return -1; 10811e86ac48SPhilipp Reisner 108213c76abaSPhilipp Reisner /* Prevent a race between resync-handshake and 108313c76abaSPhilipp Reisner * being promoted to Primary. 108413c76abaSPhilipp Reisner * 108513c76abaSPhilipp Reisner * Grab and release the state mutex, so we know that any current 108613c76abaSPhilipp Reisner * drbd_set_role() is finished, and any incoming drbd_set_role 108713c76abaSPhilipp Reisner * will see the STATE_SENT flag, and wait for it to be cleared. 108813c76abaSPhilipp Reisner */ 108931007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 109031007745SPhilipp Reisner mutex_lock(peer_device->device->state_mutex); 109131007745SPhilipp Reisner 1092cde81d99SLars Ellenberg /* avoid a race with conn_request_state( C_DISCONNECTING ) */ 1093cde81d99SLars Ellenberg spin_lock_irq(&connection->resource->req_lock); 109431007745SPhilipp Reisner set_bit(STATE_SENT, &connection->flags); 1095cde81d99SLars Ellenberg spin_unlock_irq(&connection->resource->req_lock); 109631007745SPhilipp Reisner 109731007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 109831007745SPhilipp Reisner mutex_unlock(peer_device->device->state_mutex); 109931007745SPhilipp Reisner 110031007745SPhilipp Reisner rcu_read_lock(); 110131007745SPhilipp Reisner idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 110231007745SPhilipp Reisner struct drbd_device *device = peer_device->device; 110331007745SPhilipp Reisner kref_get(&device->kref); 110431007745SPhilipp Reisner rcu_read_unlock(); 110513c76abaSPhilipp Reisner 110608b165baSPhilipp Reisner if (discard_my_data) 1107b30ab791SAndreas Gruenbacher set_bit(DISCARD_MY_DATA, &device->flags); 110808b165baSPhilipp Reisner else 1109b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 111008b165baSPhilipp Reisner 111169a22773SAndreas Gruenbacher drbd_connected(peer_device); 111205a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 1113c141ebdaSPhilipp Reisner rcu_read_lock(); 1114c141ebdaSPhilipp Reisner } 1115c141ebdaSPhilipp Reisner rcu_read_unlock(); 1116c141ebdaSPhilipp Reisner 1117bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE); 1118bde89a9eSAndreas Gruenbacher if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) { 1119bde89a9eSAndreas Gruenbacher clear_bit(STATE_SENT, &connection->flags); 11201e86ac48SPhilipp Reisner return 0; 1121a1096a6eSPhilipp Reisner } 11221e86ac48SPhilipp Reisner 11231c03e520SPhilipp Reisner drbd_thread_start(&connection->ack_receiver); 112439e91a60SLars Ellenberg /* opencoded create_singlethread_workqueue(), 112539e91a60SLars Ellenberg * to be able to use format string arguments */ 112639e91a60SLars Ellenberg connection->ack_sender = 112739e91a60SLars Ellenberg alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name); 1128668700b4SPhilipp Reisner if (!connection->ack_sender) { 1129668700b4SPhilipp Reisner drbd_err(connection, "Failed to create workqueue ack_sender\n"); 1130668700b4SPhilipp Reisner return 0; 1131668700b4SPhilipp Reisner } 1132b411b363SPhilipp Reisner 11330500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 113408b165baSPhilipp Reisner /* The discard_my_data flag is a single-shot modifier to the next 113508b165baSPhilipp Reisner * connection attempt, the handshake of which is now well underway. 113608b165baSPhilipp Reisner * No need for rcu style copying of the whole struct 113708b165baSPhilipp Reisner * just to clear a single value. */ 1138bde89a9eSAndreas Gruenbacher connection->net_conf->discard_my_data = 0; 11390500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 114008b165baSPhilipp Reisner 1141d3fcb490SPhilipp Reisner return h; 1142b411b363SPhilipp Reisner 1143b411b363SPhilipp Reisner out_release_sockets: 11447a426fd8SPhilipp Reisner if (ad.s_listen) 11457a426fd8SPhilipp Reisner sock_release(ad.s_listen); 11467da35862SPhilipp Reisner if (sock.socket) 11477da35862SPhilipp Reisner sock_release(sock.socket); 11487da35862SPhilipp Reisner if (msock.socket) 11497da35862SPhilipp Reisner sock_release(msock.socket); 1150b411b363SPhilipp Reisner return -1; 1151b411b363SPhilipp Reisner } 1152b411b363SPhilipp Reisner 1153bde89a9eSAndreas Gruenbacher static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi) 1154b411b363SPhilipp Reisner { 1155bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 1156b411b363SPhilipp Reisner 11570c8e36d9SAndreas Gruenbacher if (header_size == sizeof(struct p_header100) && 11580c8e36d9SAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) { 11590c8e36d9SAndreas Gruenbacher struct p_header100 *h = header; 11600c8e36d9SAndreas Gruenbacher if (h->pad != 0) { 11611ec861ebSAndreas Gruenbacher drbd_err(connection, "Header padding is not zero\n"); 11620c8e36d9SAndreas Gruenbacher return -EINVAL; 116302918be2SPhilipp Reisner } 11640c8e36d9SAndreas Gruenbacher pi->vnr = be16_to_cpu(h->volume); 11650c8e36d9SAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 11660c8e36d9SAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 11670c8e36d9SAndreas Gruenbacher } else if (header_size == sizeof(struct p_header95) && 1168e658983aSAndreas Gruenbacher *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) { 1169e658983aSAndreas Gruenbacher struct p_header95 *h = header; 1170e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1171b55d84baSAndreas Gruenbacher pi->size = be32_to_cpu(h->length); 1172eefc2f7dSPhilipp Reisner pi->vnr = 0; 1173e658983aSAndreas Gruenbacher } else if (header_size == sizeof(struct p_header80) && 1174e658983aSAndreas Gruenbacher *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) { 1175e658983aSAndreas Gruenbacher struct p_header80 *h = header; 1176e658983aSAndreas Gruenbacher pi->cmd = be16_to_cpu(h->command); 1177e658983aSAndreas Gruenbacher pi->size = be16_to_cpu(h->length); 117877351055SPhilipp Reisner pi->vnr = 0; 117902918be2SPhilipp Reisner } else { 11801ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n", 1181e658983aSAndreas Gruenbacher be32_to_cpu(*(__be32 *)header), 1182bde89a9eSAndreas Gruenbacher connection->agreed_pro_version); 11838172f3e9SAndreas Gruenbacher return -EINVAL; 1184b411b363SPhilipp Reisner } 1185e658983aSAndreas Gruenbacher pi->data = header + header_size; 11868172f3e9SAndreas Gruenbacher return 0; 1187b411b363SPhilipp Reisner } 1188b411b363SPhilipp Reisner 1189c51a0ef3SLars Ellenberg static void drbd_unplug_all_devices(struct drbd_connection *connection) 1190c51a0ef3SLars Ellenberg { 1191c51a0ef3SLars Ellenberg if (current->plug == &connection->receiver_plug) { 1192c51a0ef3SLars Ellenberg blk_finish_plug(&connection->receiver_plug); 1193c51a0ef3SLars Ellenberg blk_start_plug(&connection->receiver_plug); 1194c51a0ef3SLars Ellenberg } /* else: maybe just schedule() ?? */ 1195c51a0ef3SLars Ellenberg } 1196c51a0ef3SLars Ellenberg 1197bde89a9eSAndreas Gruenbacher static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) 1198257d0af6SPhilipp Reisner { 1199bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 120069bc7bc3SAndreas Gruenbacher int err; 1201257d0af6SPhilipp Reisner 1202bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection)); 1203a5c31904SAndreas Gruenbacher if (err) 120469bc7bc3SAndreas Gruenbacher return err; 1205257d0af6SPhilipp Reisner 1206bde89a9eSAndreas Gruenbacher err = decode_header(connection, buffer, pi); 1207bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 1208b411b363SPhilipp Reisner 120969bc7bc3SAndreas Gruenbacher return err; 1210b411b363SPhilipp Reisner } 1211b411b363SPhilipp Reisner 1212c51a0ef3SLars Ellenberg static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi) 1213c51a0ef3SLars Ellenberg { 1214c51a0ef3SLars Ellenberg void *buffer = connection->data.rbuf; 1215c51a0ef3SLars Ellenberg unsigned int size = drbd_header_size(connection); 1216c51a0ef3SLars Ellenberg int err; 1217c51a0ef3SLars Ellenberg 1218c51a0ef3SLars Ellenberg err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT); 1219c51a0ef3SLars Ellenberg if (err != size) { 1220c51a0ef3SLars Ellenberg /* If we have nothing in the receive buffer now, to reduce 1221c51a0ef3SLars Ellenberg * application latency, try to drain the backend queues as 1222c51a0ef3SLars Ellenberg * quickly as possible, and let remote TCP know what we have 1223c51a0ef3SLars Ellenberg * received so far. */ 1224c51a0ef3SLars Ellenberg if (err == -EAGAIN) { 1225c51a0ef3SLars Ellenberg drbd_tcp_quickack(connection->data.socket); 1226c51a0ef3SLars Ellenberg drbd_unplug_all_devices(connection); 1227c51a0ef3SLars Ellenberg } 1228c51a0ef3SLars Ellenberg if (err > 0) { 1229c51a0ef3SLars Ellenberg buffer += err; 1230c51a0ef3SLars Ellenberg size -= err; 1231c51a0ef3SLars Ellenberg } 1232c51a0ef3SLars Ellenberg err = drbd_recv_all_warn(connection, buffer, size); 1233c51a0ef3SLars Ellenberg if (err) 1234c51a0ef3SLars Ellenberg return err; 1235c51a0ef3SLars Ellenberg } 1236c51a0ef3SLars Ellenberg 1237c51a0ef3SLars Ellenberg err = decode_header(connection, connection->data.rbuf, pi); 1238c51a0ef3SLars Ellenberg connection->last_received = jiffies; 1239c51a0ef3SLars Ellenberg 1240c51a0ef3SLars Ellenberg return err; 1241c51a0ef3SLars Ellenberg } 1242f9ff0da5SLars Ellenberg /* This is blkdev_issue_flush, but asynchronous. 1243f9ff0da5SLars Ellenberg * We want to submit to all component volumes in parallel, 1244f9ff0da5SLars Ellenberg * then wait for all completions. 1245f9ff0da5SLars Ellenberg */ 1246f9ff0da5SLars Ellenberg struct issue_flush_context { 1247f9ff0da5SLars Ellenberg atomic_t pending; 1248f9ff0da5SLars Ellenberg int error; 1249f9ff0da5SLars Ellenberg struct completion done; 1250f9ff0da5SLars Ellenberg }; 1251f9ff0da5SLars Ellenberg struct one_flush_context { 1252f9ff0da5SLars Ellenberg struct drbd_device *device; 1253f9ff0da5SLars Ellenberg struct issue_flush_context *ctx; 1254f9ff0da5SLars Ellenberg }; 1255f9ff0da5SLars Ellenberg 12561ffa7bfaSBaoyou Xie static void one_flush_endio(struct bio *bio) 1257f9ff0da5SLars Ellenberg { 1258f9ff0da5SLars Ellenberg struct one_flush_context *octx = bio->bi_private; 1259f9ff0da5SLars Ellenberg struct drbd_device *device = octx->device; 1260f9ff0da5SLars Ellenberg struct issue_flush_context *ctx = octx->ctx; 1261f9ff0da5SLars Ellenberg 12624e4cbee9SChristoph Hellwig if (bio->bi_status) { 12634e4cbee9SChristoph Hellwig ctx->error = blk_status_to_errno(bio->bi_status); 12644e4cbee9SChristoph Hellwig drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status); 1265f9ff0da5SLars Ellenberg } 1266f9ff0da5SLars Ellenberg kfree(octx); 1267f9ff0da5SLars Ellenberg bio_put(bio); 1268f9ff0da5SLars Ellenberg 1269f9ff0da5SLars Ellenberg clear_bit(FLUSH_PENDING, &device->flags); 1270f9ff0da5SLars Ellenberg put_ldev(device); 1271f9ff0da5SLars Ellenberg kref_put(&device->kref, drbd_destroy_device); 1272f9ff0da5SLars Ellenberg 1273f9ff0da5SLars Ellenberg if (atomic_dec_and_test(&ctx->pending)) 1274f9ff0da5SLars Ellenberg complete(&ctx->done); 1275f9ff0da5SLars Ellenberg } 1276f9ff0da5SLars Ellenberg 1277f9ff0da5SLars Ellenberg static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) 1278f9ff0da5SLars Ellenberg { 1279f9ff0da5SLars Ellenberg struct bio *bio = bio_alloc(GFP_NOIO, 0); 1280f9ff0da5SLars Ellenberg struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); 1281f9ff0da5SLars Ellenberg if (!bio || !octx) { 1282f9ff0da5SLars Ellenberg drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n"); 1283f9ff0da5SLars Ellenberg /* FIXME: what else can I do now? disconnecting or detaching 1284f9ff0da5SLars Ellenberg * really does not help to improve the state of the world, either. 1285f9ff0da5SLars Ellenberg */ 1286f9ff0da5SLars Ellenberg kfree(octx); 1287f9ff0da5SLars Ellenberg if (bio) 1288f9ff0da5SLars Ellenberg bio_put(bio); 1289f9ff0da5SLars Ellenberg 1290f9ff0da5SLars Ellenberg ctx->error = -ENOMEM; 1291f9ff0da5SLars Ellenberg put_ldev(device); 1292f9ff0da5SLars Ellenberg kref_put(&device->kref, drbd_destroy_device); 1293f9ff0da5SLars Ellenberg return; 1294f9ff0da5SLars Ellenberg } 1295f9ff0da5SLars Ellenberg 1296f9ff0da5SLars Ellenberg octx->device = device; 1297f9ff0da5SLars Ellenberg octx->ctx = ctx; 129874d46992SChristoph Hellwig bio_set_dev(bio, device->ldev->backing_bdev); 1299f9ff0da5SLars Ellenberg bio->bi_private = octx; 1300f9ff0da5SLars Ellenberg bio->bi_end_io = one_flush_endio; 130170fd7614SChristoph Hellwig bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; 1302f9ff0da5SLars Ellenberg 1303f9ff0da5SLars Ellenberg device->flush_jif = jiffies; 1304f9ff0da5SLars Ellenberg set_bit(FLUSH_PENDING, &device->flags); 1305f9ff0da5SLars Ellenberg atomic_inc(&ctx->pending); 1306f9ff0da5SLars Ellenberg submit_bio(bio); 1307f9ff0da5SLars Ellenberg } 1308f9ff0da5SLars Ellenberg 1309bde89a9eSAndreas Gruenbacher static void drbd_flush(struct drbd_connection *connection) 1310b411b363SPhilipp Reisner { 1311f9ff0da5SLars Ellenberg if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { 1312c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 1313f9ff0da5SLars Ellenberg struct issue_flush_context ctx; 13144b0007c0SPhilipp Reisner int vnr; 1315b411b363SPhilipp Reisner 1316f9ff0da5SLars Ellenberg atomic_set(&ctx.pending, 1); 1317f9ff0da5SLars Ellenberg ctx.error = 0; 1318f9ff0da5SLars Ellenberg init_completion(&ctx.done); 1319f9ff0da5SLars Ellenberg 1320615e087fSLars Ellenberg rcu_read_lock(); 1321c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1322c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1323c06ece6bSAndreas Gruenbacher 1324b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 1325615e087fSLars Ellenberg continue; 1326b30ab791SAndreas Gruenbacher kref_get(&device->kref); 1327615e087fSLars Ellenberg rcu_read_unlock(); 13284b0007c0SPhilipp Reisner 1329f9ff0da5SLars Ellenberg submit_one_flush(device, &ctx); 1330f9ff0da5SLars Ellenberg 1331f9ff0da5SLars Ellenberg rcu_read_lock(); 1332f9ff0da5SLars Ellenberg } 1333f9ff0da5SLars Ellenberg rcu_read_unlock(); 1334f9ff0da5SLars Ellenberg 1335f9ff0da5SLars Ellenberg /* Do we want to add a timeout, 1336f9ff0da5SLars Ellenberg * if disk-timeout is set? */ 1337f9ff0da5SLars Ellenberg if (!atomic_dec_and_test(&ctx.pending)) 1338f9ff0da5SLars Ellenberg wait_for_completion(&ctx.done); 1339f9ff0da5SLars Ellenberg 1340f9ff0da5SLars Ellenberg if (ctx.error) { 1341b411b363SPhilipp Reisner /* would rather check on EOPNOTSUPP, but that is not reliable. 1342b411b363SPhilipp Reisner * don't try again for ANY return value != 0 1343b411b363SPhilipp Reisner * if (rv == -EOPNOTSUPP) */ 1344f9ff0da5SLars Ellenberg /* Any error is already reported by bio_endio callback. */ 1345f6ba8636SAndreas Gruenbacher drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); 1346b411b363SPhilipp Reisner } 1347b411b363SPhilipp Reisner } 1348b411b363SPhilipp Reisner } 1349b411b363SPhilipp Reisner 1350b411b363SPhilipp Reisner /** 1351b411b363SPhilipp Reisner * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. 1352b30ab791SAndreas Gruenbacher * @device: DRBD device. 1353b411b363SPhilipp Reisner * @epoch: Epoch object. 1354b411b363SPhilipp Reisner * @ev: Epoch event. 1355b411b363SPhilipp Reisner */ 1356bde89a9eSAndreas Gruenbacher static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection, 1357b411b363SPhilipp Reisner struct drbd_epoch *epoch, 1358b411b363SPhilipp Reisner enum epoch_event ev) 1359b411b363SPhilipp Reisner { 13602451fc3bSPhilipp Reisner int epoch_size; 1361b411b363SPhilipp Reisner struct drbd_epoch *next_epoch; 1362b411b363SPhilipp Reisner enum finish_epoch rv = FE_STILL_LIVE; 1363b411b363SPhilipp Reisner 1364bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1365b411b363SPhilipp Reisner do { 1366b411b363SPhilipp Reisner next_epoch = NULL; 1367b411b363SPhilipp Reisner 1368b411b363SPhilipp Reisner epoch_size = atomic_read(&epoch->epoch_size); 1369b411b363SPhilipp Reisner 1370b411b363SPhilipp Reisner switch (ev & ~EV_CLEANUP) { 1371b411b363SPhilipp Reisner case EV_PUT: 1372b411b363SPhilipp Reisner atomic_dec(&epoch->active); 1373b411b363SPhilipp Reisner break; 1374b411b363SPhilipp Reisner case EV_GOT_BARRIER_NR: 1375b411b363SPhilipp Reisner set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); 1376b411b363SPhilipp Reisner break; 1377b411b363SPhilipp Reisner case EV_BECAME_LAST: 1378b411b363SPhilipp Reisner /* nothing to do*/ 1379b411b363SPhilipp Reisner break; 1380b411b363SPhilipp Reisner } 1381b411b363SPhilipp Reisner 1382b411b363SPhilipp Reisner if (epoch_size != 0 && 1383b411b363SPhilipp Reisner atomic_read(&epoch->active) == 0 && 138480f9fd55SPhilipp Reisner (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { 1385b411b363SPhilipp Reisner if (!(ev & EV_CLEANUP)) { 1386bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1387bde89a9eSAndreas Gruenbacher drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size); 1388bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1389b411b363SPhilipp Reisner } 13909ed57dcbSLars Ellenberg #if 0 13919ed57dcbSLars Ellenberg /* FIXME: dec unacked on connection, once we have 13929ed57dcbSLars Ellenberg * something to count pending connection packets in. */ 139380f9fd55SPhilipp Reisner if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) 1394bde89a9eSAndreas Gruenbacher dec_unacked(epoch->connection); 13959ed57dcbSLars Ellenberg #endif 1396b411b363SPhilipp Reisner 1397bde89a9eSAndreas Gruenbacher if (connection->current_epoch != epoch) { 1398b411b363SPhilipp Reisner next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); 1399b411b363SPhilipp Reisner list_del(&epoch->list); 1400b411b363SPhilipp Reisner ev = EV_BECAME_LAST | (ev & EV_CLEANUP); 1401bde89a9eSAndreas Gruenbacher connection->epochs--; 1402b411b363SPhilipp Reisner kfree(epoch); 1403b411b363SPhilipp Reisner 1404b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1405b411b363SPhilipp Reisner rv = FE_DESTROYED; 1406b411b363SPhilipp Reisner } else { 1407b411b363SPhilipp Reisner epoch->flags = 0; 1408b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1409698f9315SUwe Kleine-König /* atomic_set(&epoch->active, 0); is already zero */ 1410b411b363SPhilipp Reisner if (rv == FE_STILL_LIVE) 1411b411b363SPhilipp Reisner rv = FE_RECYCLED; 1412b411b363SPhilipp Reisner } 1413b411b363SPhilipp Reisner } 1414b411b363SPhilipp Reisner 1415b411b363SPhilipp Reisner if (!next_epoch) 1416b411b363SPhilipp Reisner break; 1417b411b363SPhilipp Reisner 1418b411b363SPhilipp Reisner epoch = next_epoch; 1419b411b363SPhilipp Reisner } while (1); 1420b411b363SPhilipp Reisner 1421bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1422b411b363SPhilipp Reisner 1423b411b363SPhilipp Reisner return rv; 1424b411b363SPhilipp Reisner } 1425b411b363SPhilipp Reisner 14268fe39aacSPhilipp Reisner static enum write_ordering_e 14278fe39aacSPhilipp Reisner max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) 14288fe39aacSPhilipp Reisner { 14298fe39aacSPhilipp Reisner struct disk_conf *dc; 14308fe39aacSPhilipp Reisner 14318fe39aacSPhilipp Reisner dc = rcu_dereference(bdev->disk_conf); 14328fe39aacSPhilipp Reisner 1433f6ba8636SAndreas Gruenbacher if (wo == WO_BDEV_FLUSH && !dc->disk_flushes) 1434f6ba8636SAndreas Gruenbacher wo = WO_DRAIN_IO; 1435f6ba8636SAndreas Gruenbacher if (wo == WO_DRAIN_IO && !dc->disk_drain) 1436f6ba8636SAndreas Gruenbacher wo = WO_NONE; 14378fe39aacSPhilipp Reisner 14388fe39aacSPhilipp Reisner return wo; 14398fe39aacSPhilipp Reisner } 14408fe39aacSPhilipp Reisner 1441b411b363SPhilipp Reisner /** 1442b411b363SPhilipp Reisner * drbd_bump_write_ordering() - Fall back to an other write ordering method 1443bde89a9eSAndreas Gruenbacher * @connection: DRBD connection. 1444b411b363SPhilipp Reisner * @wo: Write ordering method to try. 1445b411b363SPhilipp Reisner */ 14468fe39aacSPhilipp Reisner void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, 14478fe39aacSPhilipp Reisner enum write_ordering_e wo) 1448b411b363SPhilipp Reisner { 1449e9526580SPhilipp Reisner struct drbd_device *device; 1450b411b363SPhilipp Reisner enum write_ordering_e pwo; 14514b0007c0SPhilipp Reisner int vnr; 1452b411b363SPhilipp Reisner static char *write_ordering_str[] = { 1453f6ba8636SAndreas Gruenbacher [WO_NONE] = "none", 1454f6ba8636SAndreas Gruenbacher [WO_DRAIN_IO] = "drain", 1455f6ba8636SAndreas Gruenbacher [WO_BDEV_FLUSH] = "flush", 1456b411b363SPhilipp Reisner }; 1457b411b363SPhilipp Reisner 1458e9526580SPhilipp Reisner pwo = resource->write_ordering; 1459f6ba8636SAndreas Gruenbacher if (wo != WO_BDEV_FLUSH) 1460b411b363SPhilipp Reisner wo = min(pwo, wo); 1461daeda1ccSPhilipp Reisner rcu_read_lock(); 1462e9526580SPhilipp Reisner idr_for_each_entry(&resource->devices, device, vnr) { 14638fe39aacSPhilipp Reisner if (get_ldev(device)) { 14648fe39aacSPhilipp Reisner wo = max_allowed_wo(device->ldev, wo); 14658fe39aacSPhilipp Reisner if (device->ldev == bdev) 14668fe39aacSPhilipp Reisner bdev = NULL; 1467b30ab791SAndreas Gruenbacher put_ldev(device); 14684b0007c0SPhilipp Reisner } 14698fe39aacSPhilipp Reisner } 14708fe39aacSPhilipp Reisner 14718fe39aacSPhilipp Reisner if (bdev) 14728fe39aacSPhilipp Reisner wo = max_allowed_wo(bdev, wo); 14738fe39aacSPhilipp Reisner 147470df7092SLars Ellenberg rcu_read_unlock(); 147570df7092SLars Ellenberg 1476e9526580SPhilipp Reisner resource->write_ordering = wo; 1477f6ba8636SAndreas Gruenbacher if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH) 1478e9526580SPhilipp Reisner drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); 1479b411b363SPhilipp Reisner } 1480b411b363SPhilipp Reisner 1481f31e583aSLars Ellenberg /* 1482f31e583aSLars Ellenberg * Mapping "discard" to ZEROOUT with UNMAP does not work for us: 1483f31e583aSLars Ellenberg * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it 1484f31e583aSLars Ellenberg * will directly go to fallback mode, submitting normal writes, and 1485f31e583aSLars Ellenberg * never even try to UNMAP. 1486f31e583aSLars Ellenberg * 1487f31e583aSLars Ellenberg * And dm-thin does not do this (yet), mostly because in general it has 1488f31e583aSLars Ellenberg * to assume that "skip_block_zeroing" is set. See also: 1489f31e583aSLars Ellenberg * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html 1490f31e583aSLars Ellenberg * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html 1491f31e583aSLars Ellenberg * 1492f31e583aSLars Ellenberg * We *may* ignore the discard-zeroes-data setting, if so configured. 1493f31e583aSLars Ellenberg * 1494f31e583aSLars Ellenberg * Assumption is that this "discard_zeroes_data=0" is only because the backend 1495f31e583aSLars Ellenberg * may ignore partial unaligned discards. 1496f31e583aSLars Ellenberg * 1497f31e583aSLars Ellenberg * LVM/DM thin as of at least 1498f31e583aSLars Ellenberg * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) 1499f31e583aSLars Ellenberg * Library version: 1.02.93-RHEL7 (2015-01-28) 1500f31e583aSLars Ellenberg * Driver version: 4.29.0 1501f31e583aSLars Ellenberg * still behaves this way. 1502f31e583aSLars Ellenberg * 1503f31e583aSLars Ellenberg * For unaligned (wrt. alignment and granularity) or too small discards, 1504f31e583aSLars Ellenberg * we zero-out the initial (and/or) trailing unaligned partial chunks, 1505f31e583aSLars Ellenberg * but discard all the aligned full chunks. 1506f31e583aSLars Ellenberg * 1507f31e583aSLars Ellenberg * At least for LVM/DM thin, with skip_block_zeroing=false, 1508f31e583aSLars Ellenberg * the result is effectively "discard_zeroes_data=1". 1509f31e583aSLars Ellenberg */ 1510f31e583aSLars Ellenberg /* flags: EE_TRIM|EE_ZEROOUT */ 1511f31e583aSLars Ellenberg int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags) 1512dd4f699dSLars Ellenberg { 15130dbed96aSChristoph Hellwig struct block_device *bdev = device->ldev->backing_bdev; 1514f31e583aSLars Ellenberg struct request_queue *q = bdev_get_queue(bdev); 1515f31e583aSLars Ellenberg sector_t tmp, nr; 1516f31e583aSLars Ellenberg unsigned int max_discard_sectors, granularity; 1517f31e583aSLars Ellenberg int alignment; 1518f31e583aSLars Ellenberg int err = 0; 1519dd4f699dSLars Ellenberg 1520f31e583aSLars Ellenberg if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM)) 1521f31e583aSLars Ellenberg goto zero_out; 1522f31e583aSLars Ellenberg 1523f31e583aSLars Ellenberg /* Zero-sector (unknown) and one-sector granularities are the same. */ 1524f31e583aSLars Ellenberg granularity = max(q->limits.discard_granularity >> 9, 1U); 1525f31e583aSLars Ellenberg alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 1526f31e583aSLars Ellenberg 1527f31e583aSLars Ellenberg max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); 1528f31e583aSLars Ellenberg max_discard_sectors -= max_discard_sectors % granularity; 1529f31e583aSLars Ellenberg if (unlikely(!max_discard_sectors)) 1530f31e583aSLars Ellenberg goto zero_out; 1531f31e583aSLars Ellenberg 1532f31e583aSLars Ellenberg if (nr_sectors < granularity) 1533f31e583aSLars Ellenberg goto zero_out; 1534f31e583aSLars Ellenberg 1535f31e583aSLars Ellenberg tmp = start; 1536f31e583aSLars Ellenberg if (sector_div(tmp, granularity) != alignment) { 1537f31e583aSLars Ellenberg if (nr_sectors < 2*granularity) 1538f31e583aSLars Ellenberg goto zero_out; 1539f31e583aSLars Ellenberg /* start + gran - (start + gran - align) % gran */ 1540f31e583aSLars Ellenberg tmp = start + granularity - alignment; 1541f31e583aSLars Ellenberg tmp = start + granularity - sector_div(tmp, granularity); 1542f31e583aSLars Ellenberg 1543f31e583aSLars Ellenberg nr = tmp - start; 1544f31e583aSLars Ellenberg /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many 1545f31e583aSLars Ellenberg * layers are below us, some may have smaller granularity */ 1546f31e583aSLars Ellenberg err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); 1547f31e583aSLars Ellenberg nr_sectors -= nr; 1548f31e583aSLars Ellenberg start = tmp; 1549f31e583aSLars Ellenberg } 1550f31e583aSLars Ellenberg while (nr_sectors >= max_discard_sectors) { 1551f31e583aSLars Ellenberg err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0); 1552f31e583aSLars Ellenberg nr_sectors -= max_discard_sectors; 1553f31e583aSLars Ellenberg start += max_discard_sectors; 1554f31e583aSLars Ellenberg } 1555f31e583aSLars Ellenberg if (nr_sectors) { 1556f31e583aSLars Ellenberg /* max_discard_sectors is unsigned int (and a multiple of 1557f31e583aSLars Ellenberg * granularity, we made sure of that above already); 1558f31e583aSLars Ellenberg * nr is < max_discard_sectors; 1559f31e583aSLars Ellenberg * I don't need sector_div here, even though nr is sector_t */ 1560f31e583aSLars Ellenberg nr = nr_sectors; 1561f31e583aSLars Ellenberg nr -= (unsigned int)nr % granularity; 1562f31e583aSLars Ellenberg if (nr) { 1563f31e583aSLars Ellenberg err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); 1564f31e583aSLars Ellenberg nr_sectors -= nr; 1565f31e583aSLars Ellenberg start += nr; 1566f31e583aSLars Ellenberg } 1567f31e583aSLars Ellenberg } 1568f31e583aSLars Ellenberg zero_out: 1569f31e583aSLars Ellenberg if (nr_sectors) { 1570f31e583aSLars Ellenberg err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 1571f31e583aSLars Ellenberg (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP); 1572f31e583aSLars Ellenberg } 1573f31e583aSLars Ellenberg return err != 0; 1574f31e583aSLars Ellenberg } 1575f31e583aSLars Ellenberg 1576f31e583aSLars Ellenberg static bool can_do_reliable_discards(struct drbd_device *device) 1577f31e583aSLars Ellenberg { 1578f31e583aSLars Ellenberg struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); 1579f31e583aSLars Ellenberg struct disk_conf *dc; 1580f31e583aSLars Ellenberg bool can_do; 1581f31e583aSLars Ellenberg 1582f31e583aSLars Ellenberg if (!blk_queue_discard(q)) 1583f31e583aSLars Ellenberg return false; 1584f31e583aSLars Ellenberg 1585f31e583aSLars Ellenberg rcu_read_lock(); 1586f31e583aSLars Ellenberg dc = rcu_dereference(device->ldev->disk_conf); 1587f31e583aSLars Ellenberg can_do = dc->discard_zeroes_if_aligned; 1588f31e583aSLars Ellenberg rcu_read_unlock(); 1589f31e583aSLars Ellenberg return can_do; 1590f31e583aSLars Ellenberg } 1591f31e583aSLars Ellenberg 1592f31e583aSLars Ellenberg static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req) 1593f31e583aSLars Ellenberg { 1594f31e583aSLars Ellenberg /* If the backend cannot discard, or does not guarantee 1595f31e583aSLars Ellenberg * read-back zeroes in discarded ranges, we fall back to 1596f31e583aSLars Ellenberg * zero-out. Unless configuration specifically requested 1597f31e583aSLars Ellenberg * otherwise. */ 1598f31e583aSLars Ellenberg if (!can_do_reliable_discards(device)) 1599f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT; 1600f31e583aSLars Ellenberg 1601f31e583aSLars Ellenberg if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, 1602f31e583aSLars Ellenberg peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM))) 1603dd4f699dSLars Ellenberg peer_req->flags |= EE_WAS_ERROR; 1604dd4f699dSLars Ellenberg drbd_endio_write_sec_final(peer_req); 1605dd4f699dSLars Ellenberg } 1606dd4f699dSLars Ellenberg 16079104d31aSLars Ellenberg static void drbd_issue_peer_wsame(struct drbd_device *device, 16089104d31aSLars Ellenberg struct drbd_peer_request *peer_req) 16099104d31aSLars Ellenberg { 16109104d31aSLars Ellenberg struct block_device *bdev = device->ldev->backing_bdev; 16119104d31aSLars Ellenberg sector_t s = peer_req->i.sector; 16129104d31aSLars Ellenberg sector_t nr = peer_req->i.size >> 9; 16139104d31aSLars Ellenberg if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages)) 16149104d31aSLars Ellenberg peer_req->flags |= EE_WAS_ERROR; 16159104d31aSLars Ellenberg drbd_endio_write_sec_final(peer_req); 16169104d31aSLars Ellenberg } 16179104d31aSLars Ellenberg 16189104d31aSLars Ellenberg 1619b411b363SPhilipp Reisner /** 1620fbe29decSAndreas Gruenbacher * drbd_submit_peer_request() 1621b30ab791SAndreas Gruenbacher * @device: DRBD device. 1622db830c46SAndreas Gruenbacher * @peer_req: peer request 16231eff9d32SJens Axboe * @rw: flag field, see bio->bi_opf 162410f6d992SLars Ellenberg * 162510f6d992SLars Ellenberg * May spread the pages to multiple bios, 162610f6d992SLars Ellenberg * depending on bio_add_page restrictions. 162710f6d992SLars Ellenberg * 162810f6d992SLars Ellenberg * Returns 0 if all bios have been submitted, 162910f6d992SLars Ellenberg * -ENOMEM if we could not allocate enough bios, 163010f6d992SLars Ellenberg * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a 163110f6d992SLars Ellenberg * single page to an empty bio (which should never happen and likely indicates 163210f6d992SLars Ellenberg * that the lower level IO stack is in some way broken). This has been observed 163310f6d992SLars Ellenberg * on certain Xen deployments. 163445bb912bSLars Ellenberg */ 163545bb912bSLars Ellenberg /* TODO allocate from our own bio_set. */ 1636b30ab791SAndreas Gruenbacher int drbd_submit_peer_request(struct drbd_device *device, 1637fbe29decSAndreas Gruenbacher struct drbd_peer_request *peer_req, 1638bb3cc85eSMike Christie const unsigned op, const unsigned op_flags, 1639bb3cc85eSMike Christie const int fault_type) 164045bb912bSLars Ellenberg { 164145bb912bSLars Ellenberg struct bio *bios = NULL; 164245bb912bSLars Ellenberg struct bio *bio; 1643db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 1644db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 164511f8b2b6SAndreas Gruenbacher unsigned data_size = peer_req->i.size; 164645bb912bSLars Ellenberg unsigned n_bios = 0; 164711f8b2b6SAndreas Gruenbacher unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; 164810f6d992SLars Ellenberg int err = -ENOMEM; 164945bb912bSLars Ellenberg 1650dd4f699dSLars Ellenberg /* TRIM/DISCARD: for now, always use the helper function 1651dd4f699dSLars Ellenberg * blkdev_issue_zeroout(..., discard=true). 1652dd4f699dSLars Ellenberg * It's synchronous, but it does the right thing wrt. bio splitting. 1653dd4f699dSLars Ellenberg * Correctness first, performance later. Next step is to code an 1654dd4f699dSLars Ellenberg * asynchronous variant of the same. 1655dd4f699dSLars Ellenberg */ 1656f31e583aSLars Ellenberg if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) { 1657a0fb3c47SLars Ellenberg /* wait for all pending IO completions, before we start 1658a0fb3c47SLars Ellenberg * zeroing things out. */ 16595dd2ca19SAndreas Gruenbacher conn_wait_active_ee_empty(peer_req->peer_device->connection); 166045d2933cSLars Ellenberg /* add it to the active list now, 166145d2933cSLars Ellenberg * so we can find it to present it in debugfs */ 166221ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 166321ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED; 1664700ca8c0SPhilipp Reisner 1665700ca8c0SPhilipp Reisner /* If this was a resync request from receive_rs_deallocated(), 1666700ca8c0SPhilipp Reisner * it is already on the sync_ee list */ 1667700ca8c0SPhilipp Reisner if (list_empty(&peer_req->w.list)) { 166845d2933cSLars Ellenberg spin_lock_irq(&device->resource->req_lock); 166945d2933cSLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee); 167045d2933cSLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 1671700ca8c0SPhilipp Reisner } 1672700ca8c0SPhilipp Reisner 1673f31e583aSLars Ellenberg if (peer_req->flags & (EE_TRIM|EE_ZEROOUT)) 1674f31e583aSLars Ellenberg drbd_issue_peer_discard_or_zero_out(device, peer_req); 16759104d31aSLars Ellenberg else /* EE_WRITE_SAME */ 16769104d31aSLars Ellenberg drbd_issue_peer_wsame(device, peer_req); 1677a0fb3c47SLars Ellenberg return 0; 1678a0fb3c47SLars Ellenberg } 1679a0fb3c47SLars Ellenberg 168045bb912bSLars Ellenberg /* In most cases, we will only need one bio. But in case the lower 168145bb912bSLars Ellenberg * level restrictions happen to be different at this offset on this 168245bb912bSLars Ellenberg * side than those of the sending peer, we may need to submit the 16839476f39dSLars Ellenberg * request in more than one bio. 16849476f39dSLars Ellenberg * 16859476f39dSLars Ellenberg * Plain bio_alloc is good enough here, this is no DRBD internally 16869476f39dSLars Ellenberg * generated bio, but a bio allocated on behalf of the peer. 16879476f39dSLars Ellenberg */ 168845bb912bSLars Ellenberg next_bio: 168945bb912bSLars Ellenberg bio = bio_alloc(GFP_NOIO, nr_pages); 169045bb912bSLars Ellenberg if (!bio) { 1691a0fb3c47SLars Ellenberg drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); 169245bb912bSLars Ellenberg goto fail; 169345bb912bSLars Ellenberg } 1694db830c46SAndreas Gruenbacher /* > peer_req->i.sector, unless this is the first bio */ 16954f024f37SKent Overstreet bio->bi_iter.bi_sector = sector; 169674d46992SChristoph Hellwig bio_set_dev(bio, device->ldev->backing_bdev); 1697bb3cc85eSMike Christie bio_set_op_attrs(bio, op, op_flags); 1698db830c46SAndreas Gruenbacher bio->bi_private = peer_req; 1699fcefa62eSAndreas Gruenbacher bio->bi_end_io = drbd_peer_request_endio; 170045bb912bSLars Ellenberg 170145bb912bSLars Ellenberg bio->bi_next = bios; 170245bb912bSLars Ellenberg bios = bio; 170345bb912bSLars Ellenberg ++n_bios; 170445bb912bSLars Ellenberg 170545bb912bSLars Ellenberg page_chain_for_each(page) { 170611f8b2b6SAndreas Gruenbacher unsigned len = min_t(unsigned, data_size, PAGE_SIZE); 170706efffdaSMing Lei if (!bio_add_page(bio, page, len, 0)) 170845bb912bSLars Ellenberg goto next_bio; 170911f8b2b6SAndreas Gruenbacher data_size -= len; 171045bb912bSLars Ellenberg sector += len >> 9; 171145bb912bSLars Ellenberg --nr_pages; 171245bb912bSLars Ellenberg } 171311f8b2b6SAndreas Gruenbacher D_ASSERT(device, data_size == 0); 1714a0fb3c47SLars Ellenberg D_ASSERT(device, page == NULL); 171545bb912bSLars Ellenberg 1716db830c46SAndreas Gruenbacher atomic_set(&peer_req->pending_bios, n_bios); 171721ae5d7fSLars Ellenberg /* for debugfs: update timestamp, mark as submitted */ 171821ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 171921ae5d7fSLars Ellenberg peer_req->flags |= EE_SUBMITTED; 172045bb912bSLars Ellenberg do { 172145bb912bSLars Ellenberg bio = bios; 172245bb912bSLars Ellenberg bios = bios->bi_next; 172345bb912bSLars Ellenberg bio->bi_next = NULL; 172445bb912bSLars Ellenberg 1725b30ab791SAndreas Gruenbacher drbd_generic_make_request(device, fault_type, bio); 172645bb912bSLars Ellenberg } while (bios); 172745bb912bSLars Ellenberg return 0; 172845bb912bSLars Ellenberg 172945bb912bSLars Ellenberg fail: 173045bb912bSLars Ellenberg while (bios) { 173145bb912bSLars Ellenberg bio = bios; 173245bb912bSLars Ellenberg bios = bios->bi_next; 173345bb912bSLars Ellenberg bio_put(bio); 173445bb912bSLars Ellenberg } 173510f6d992SLars Ellenberg return err; 173645bb912bSLars Ellenberg } 173745bb912bSLars Ellenberg 1738b30ab791SAndreas Gruenbacher static void drbd_remove_epoch_entry_interval(struct drbd_device *device, 1739db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 174053840641SAndreas Gruenbacher { 1741db830c46SAndreas Gruenbacher struct drbd_interval *i = &peer_req->i; 174253840641SAndreas Gruenbacher 1743b30ab791SAndreas Gruenbacher drbd_remove_interval(&device->write_requests, i); 174453840641SAndreas Gruenbacher drbd_clear_interval(i); 174553840641SAndreas Gruenbacher 17466c852becSAndreas Gruenbacher /* Wake up any processes waiting for this peer request to complete. */ 174753840641SAndreas Gruenbacher if (i->waiting) 1748b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 174953840641SAndreas Gruenbacher } 175053840641SAndreas Gruenbacher 1751bde89a9eSAndreas Gruenbacher static void conn_wait_active_ee_empty(struct drbd_connection *connection) 175277fede51SPhilipp Reisner { 1753c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 175477fede51SPhilipp Reisner int vnr; 175577fede51SPhilipp Reisner 175677fede51SPhilipp Reisner rcu_read_lock(); 1757c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1758c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1759c06ece6bSAndreas Gruenbacher 1760b30ab791SAndreas Gruenbacher kref_get(&device->kref); 176177fede51SPhilipp Reisner rcu_read_unlock(); 1762b30ab791SAndreas Gruenbacher drbd_wait_ee_list_empty(device, &device->active_ee); 176305a10ec7SAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 176477fede51SPhilipp Reisner rcu_read_lock(); 176577fede51SPhilipp Reisner } 176677fede51SPhilipp Reisner rcu_read_unlock(); 176777fede51SPhilipp Reisner } 176877fede51SPhilipp Reisner 1769bde89a9eSAndreas Gruenbacher static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) 1770b411b363SPhilipp Reisner { 17712451fc3bSPhilipp Reisner int rv; 1772e658983aSAndreas Gruenbacher struct p_barrier *p = pi->data; 1773b411b363SPhilipp Reisner struct drbd_epoch *epoch; 1774b411b363SPhilipp Reisner 17759ed57dcbSLars Ellenberg /* FIXME these are unacked on connection, 17769ed57dcbSLars Ellenberg * not a specific (peer)device. 17779ed57dcbSLars Ellenberg */ 1778bde89a9eSAndreas Gruenbacher connection->current_epoch->barrier_nr = p->barrier; 1779bde89a9eSAndreas Gruenbacher connection->current_epoch->connection = connection; 1780bde89a9eSAndreas Gruenbacher rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR); 1781b411b363SPhilipp Reisner 1782b411b363SPhilipp Reisner /* P_BARRIER_ACK may imply that the corresponding extent is dropped from 1783b411b363SPhilipp Reisner * the activity log, which means it would not be resynced in case the 1784b411b363SPhilipp Reisner * R_PRIMARY crashes now. 1785b411b363SPhilipp Reisner * Therefore we must send the barrier_ack after the barrier request was 1786b411b363SPhilipp Reisner * completed. */ 1787e9526580SPhilipp Reisner switch (connection->resource->write_ordering) { 1788f6ba8636SAndreas Gruenbacher case WO_NONE: 1789b411b363SPhilipp Reisner if (rv == FE_RECYCLED) 179082bc0194SAndreas Gruenbacher return 0; 1791b411b363SPhilipp Reisner 1792b411b363SPhilipp Reisner /* receiver context, in the writeout path of the other node. 1793b411b363SPhilipp Reisner * avoid potential distributed deadlock */ 1794b411b363SPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 17952451fc3bSPhilipp Reisner if (epoch) 17962451fc3bSPhilipp Reisner break; 17972451fc3bSPhilipp Reisner else 17981ec861ebSAndreas Gruenbacher drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); 17992451fc3bSPhilipp Reisner /* Fall through */ 18002451fc3bSPhilipp Reisner 1801f6ba8636SAndreas Gruenbacher case WO_BDEV_FLUSH: 1802f6ba8636SAndreas Gruenbacher case WO_DRAIN_IO: 1803bde89a9eSAndreas Gruenbacher conn_wait_active_ee_empty(connection); 1804bde89a9eSAndreas Gruenbacher drbd_flush(connection); 18052451fc3bSPhilipp Reisner 1806bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 18072451fc3bSPhilipp Reisner epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); 18082451fc3bSPhilipp Reisner if (epoch) 18092451fc3bSPhilipp Reisner break; 1810b411b363SPhilipp Reisner } 1811b411b363SPhilipp Reisner 181282bc0194SAndreas Gruenbacher return 0; 18132451fc3bSPhilipp Reisner default: 1814e9526580SPhilipp Reisner drbd_err(connection, "Strangeness in connection->write_ordering %d\n", 1815e9526580SPhilipp Reisner connection->resource->write_ordering); 181682bc0194SAndreas Gruenbacher return -EIO; 1817b411b363SPhilipp Reisner } 1818b411b363SPhilipp Reisner 1819b411b363SPhilipp Reisner epoch->flags = 0; 1820b411b363SPhilipp Reisner atomic_set(&epoch->epoch_size, 0); 1821b411b363SPhilipp Reisner atomic_set(&epoch->active, 0); 1822b411b363SPhilipp Reisner 1823bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 1824bde89a9eSAndreas Gruenbacher if (atomic_read(&connection->current_epoch->epoch_size)) { 1825bde89a9eSAndreas Gruenbacher list_add(&epoch->list, &connection->current_epoch->list); 1826bde89a9eSAndreas Gruenbacher connection->current_epoch = epoch; 1827bde89a9eSAndreas Gruenbacher connection->epochs++; 1828b411b363SPhilipp Reisner } else { 1829b411b363SPhilipp Reisner /* The current_epoch got recycled while we allocated this one... */ 1830b411b363SPhilipp Reisner kfree(epoch); 1831b411b363SPhilipp Reisner } 1832bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 1833b411b363SPhilipp Reisner 183482bc0194SAndreas Gruenbacher return 0; 1835b411b363SPhilipp Reisner } 1836b411b363SPhilipp Reisner 18379104d31aSLars Ellenberg /* quick wrapper in case payload size != request_size (write same) */ 18383d0e6375SKees Cook static void drbd_csum_ee_size(struct crypto_shash *h, 18399104d31aSLars Ellenberg struct drbd_peer_request *r, void *d, 18409104d31aSLars Ellenberg unsigned int payload_size) 18419104d31aSLars Ellenberg { 18429104d31aSLars Ellenberg unsigned int tmp = r->i.size; 18439104d31aSLars Ellenberg r->i.size = payload_size; 18449104d31aSLars Ellenberg drbd_csum_ee(h, r, d); 18459104d31aSLars Ellenberg r->i.size = tmp; 18469104d31aSLars Ellenberg } 18479104d31aSLars Ellenberg 1848b411b363SPhilipp Reisner /* used from receive_RSDataReply (recv_resync_read) 18499104d31aSLars Ellenberg * and from receive_Data. 18509104d31aSLars Ellenberg * data_size: actual payload ("data in") 18519104d31aSLars Ellenberg * for normal writes that is bi_size. 18529104d31aSLars Ellenberg * for discards, that is zero. 18539104d31aSLars Ellenberg * for write same, it is logical_block_size. 18549104d31aSLars Ellenberg * both trim and write same have the bi_size ("data len to be affected") 18559104d31aSLars Ellenberg * as extra argument in the packet header. 18569104d31aSLars Ellenberg */ 1857f6ffca9fSAndreas Gruenbacher static struct drbd_peer_request * 185869a22773SAndreas Gruenbacher read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, 1859a0fb3c47SLars Ellenberg struct packet_info *pi) __must_hold(local) 1860b411b363SPhilipp Reisner { 186169a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 1862b30ab791SAndreas Gruenbacher const sector_t capacity = drbd_get_capacity(device->this_bdev); 1863db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 1864b411b363SPhilipp Reisner struct page *page; 186511f8b2b6SAndreas Gruenbacher int digest_size, err; 186611f8b2b6SAndreas Gruenbacher unsigned int data_size = pi->size, ds; 186769a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 186869a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 18696b4388acSPhilipp Reisner unsigned long *data; 1870a0fb3c47SLars Ellenberg struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; 1871f31e583aSLars Ellenberg struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL; 18729104d31aSLars Ellenberg struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL; 1873b411b363SPhilipp Reisner 187411f8b2b6SAndreas Gruenbacher digest_size = 0; 1875a0fb3c47SLars Ellenberg if (!trim && peer_device->connection->peer_integrity_tfm) { 18763d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 18779f5bdc33SAndreas Gruenbacher /* 18789f5bdc33SAndreas Gruenbacher * FIXME: Receive the incoming digest into the receive buffer 18799f5bdc33SAndreas Gruenbacher * here, together with its struct p_data? 18809f5bdc33SAndreas Gruenbacher */ 188111f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 1882a5c31904SAndreas Gruenbacher if (err) 1883b411b363SPhilipp Reisner return NULL; 188411f8b2b6SAndreas Gruenbacher data_size -= digest_size; 188588104ca4SAndreas Gruenbacher } 1886b411b363SPhilipp Reisner 18879104d31aSLars Ellenberg /* assume request_size == data_size, but special case trim and wsame. */ 18889104d31aSLars Ellenberg ds = data_size; 1889a0fb3c47SLars Ellenberg if (trim) { 18909104d31aSLars Ellenberg if (!expect(data_size == 0)) 18919104d31aSLars Ellenberg return NULL; 18929104d31aSLars Ellenberg ds = be32_to_cpu(trim->size); 1893f31e583aSLars Ellenberg } else if (zeroes) { 1894f31e583aSLars Ellenberg if (!expect(data_size == 0)) 1895f31e583aSLars Ellenberg return NULL; 1896f31e583aSLars Ellenberg ds = be32_to_cpu(zeroes->size); 18979104d31aSLars Ellenberg } else if (wsame) { 18989104d31aSLars Ellenberg if (data_size != queue_logical_block_size(device->rq_queue)) { 18999104d31aSLars Ellenberg drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n", 19009104d31aSLars Ellenberg data_size, queue_logical_block_size(device->rq_queue)); 19019104d31aSLars Ellenberg return NULL; 19029104d31aSLars Ellenberg } 19039104d31aSLars Ellenberg if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) { 19049104d31aSLars Ellenberg drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n", 19059104d31aSLars Ellenberg data_size, bdev_logical_block_size(device->ldev->backing_bdev)); 19069104d31aSLars Ellenberg return NULL; 19079104d31aSLars Ellenberg } 19089104d31aSLars Ellenberg ds = be32_to_cpu(wsame->size); 1909a0fb3c47SLars Ellenberg } 1910a0fb3c47SLars Ellenberg 19119104d31aSLars Ellenberg if (!expect(IS_ALIGNED(ds, 512))) 1912841ce241SAndreas Gruenbacher return NULL; 1913f31e583aSLars Ellenberg if (trim || wsame || zeroes) { 19149104d31aSLars Ellenberg if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) 19159104d31aSLars Ellenberg return NULL; 19169104d31aSLars Ellenberg } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) 1917841ce241SAndreas Gruenbacher return NULL; 1918b411b363SPhilipp Reisner 19196666032aSLars Ellenberg /* even though we trust out peer, 19206666032aSLars Ellenberg * we sometimes have to double check. */ 19219104d31aSLars Ellenberg if (sector + (ds>>9) > capacity) { 1922d0180171SAndreas Gruenbacher drbd_err(device, "request from peer beyond end of local disk: " 1923fdda6544SLars Ellenberg "capacity: %llus < sector: %llus + size: %u\n", 19246666032aSLars Ellenberg (unsigned long long)capacity, 19259104d31aSLars Ellenberg (unsigned long long)sector, ds); 19266666032aSLars Ellenberg return NULL; 19276666032aSLars Ellenberg } 19286666032aSLars Ellenberg 1929b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 1930b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 1931b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 19329104d31aSLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO); 1933db830c46SAndreas Gruenbacher if (!peer_req) 1934b411b363SPhilipp Reisner return NULL; 193545bb912bSLars Ellenberg 193621ae5d7fSLars Ellenberg peer_req->flags |= EE_WRITE; 19379104d31aSLars Ellenberg if (trim) { 1938f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM; 1939f31e583aSLars Ellenberg return peer_req; 1940f31e583aSLars Ellenberg } 1941f31e583aSLars Ellenberg if (zeroes) { 1942f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT; 194381a3537aSLars Ellenberg return peer_req; 19449104d31aSLars Ellenberg } 19459104d31aSLars Ellenberg if (wsame) 19469104d31aSLars Ellenberg peer_req->flags |= EE_WRITE_SAME; 1947a73ff323SLars Ellenberg 19489104d31aSLars Ellenberg /* receive payload size bytes into page chain */ 1949b411b363SPhilipp Reisner ds = data_size; 1950db830c46SAndreas Gruenbacher page = peer_req->pages; 195145bb912bSLars Ellenberg page_chain_for_each(page) { 195245bb912bSLars Ellenberg unsigned len = min_t(int, ds, PAGE_SIZE); 19536b4388acSPhilipp Reisner data = kmap(page); 195469a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1955b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) { 1956d0180171SAndreas Gruenbacher drbd_err(device, "Fault injection: Corrupting data on receive\n"); 19576b4388acSPhilipp Reisner data[0] = data[0] ^ (unsigned long)-1; 19586b4388acSPhilipp Reisner } 1959b411b363SPhilipp Reisner kunmap(page); 1960a5c31904SAndreas Gruenbacher if (err) { 1961b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1962b411b363SPhilipp Reisner return NULL; 1963b411b363SPhilipp Reisner } 1964a5c31904SAndreas Gruenbacher ds -= len; 1965b411b363SPhilipp Reisner } 1966b411b363SPhilipp Reisner 196711f8b2b6SAndreas Gruenbacher if (digest_size) { 19689104d31aSLars Ellenberg drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size); 196911f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) { 1970d0180171SAndreas Gruenbacher drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", 1971470be44aSLars Ellenberg (unsigned long long)sector, data_size); 1972b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 1973b411b363SPhilipp Reisner return NULL; 1974b411b363SPhilipp Reisner } 1975b411b363SPhilipp Reisner } 1976b30ab791SAndreas Gruenbacher device->recv_cnt += data_size >> 9; 1977db830c46SAndreas Gruenbacher return peer_req; 1978b411b363SPhilipp Reisner } 1979b411b363SPhilipp Reisner 1980b411b363SPhilipp Reisner /* drbd_drain_block() just takes a data block 1981b411b363SPhilipp Reisner * out of the socket input buffer, and discards it. 1982b411b363SPhilipp Reisner */ 198369a22773SAndreas Gruenbacher static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) 1984b411b363SPhilipp Reisner { 1985b411b363SPhilipp Reisner struct page *page; 1986a5c31904SAndreas Gruenbacher int err = 0; 1987b411b363SPhilipp Reisner void *data; 1988b411b363SPhilipp Reisner 1989c3470cdeSLars Ellenberg if (!data_size) 1990fc5be839SAndreas Gruenbacher return 0; 1991c3470cdeSLars Ellenberg 199269a22773SAndreas Gruenbacher page = drbd_alloc_pages(peer_device, 1, 1); 1993b411b363SPhilipp Reisner 1994b411b363SPhilipp Reisner data = kmap(page); 1995b411b363SPhilipp Reisner while (data_size) { 1996fc5be839SAndreas Gruenbacher unsigned int len = min_t(int, data_size, PAGE_SIZE); 1997fc5be839SAndreas Gruenbacher 199869a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, data, len); 1999a5c31904SAndreas Gruenbacher if (err) 2000b411b363SPhilipp Reisner break; 2001a5c31904SAndreas Gruenbacher data_size -= len; 2002b411b363SPhilipp Reisner } 2003b411b363SPhilipp Reisner kunmap(page); 200469a22773SAndreas Gruenbacher drbd_free_pages(peer_device->device, page, 0); 2005fc5be839SAndreas Gruenbacher return err; 2006b411b363SPhilipp Reisner } 2007b411b363SPhilipp Reisner 200869a22773SAndreas Gruenbacher static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req, 2009b411b363SPhilipp Reisner sector_t sector, int data_size) 2010b411b363SPhilipp Reisner { 20117988613bSKent Overstreet struct bio_vec bvec; 20127988613bSKent Overstreet struct bvec_iter iter; 2013b411b363SPhilipp Reisner struct bio *bio; 201411f8b2b6SAndreas Gruenbacher int digest_size, err, expect; 201569a22773SAndreas Gruenbacher void *dig_in = peer_device->connection->int_dig_in; 201669a22773SAndreas Gruenbacher void *dig_vv = peer_device->connection->int_dig_vv; 2017b411b363SPhilipp Reisner 201811f8b2b6SAndreas Gruenbacher digest_size = 0; 201969a22773SAndreas Gruenbacher if (peer_device->connection->peer_integrity_tfm) { 20203d0e6375SKees Cook digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); 202111f8b2b6SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); 2022a5c31904SAndreas Gruenbacher if (err) 2023a5c31904SAndreas Gruenbacher return err; 202411f8b2b6SAndreas Gruenbacher data_size -= digest_size; 202588104ca4SAndreas Gruenbacher } 2026b411b363SPhilipp Reisner 2027b411b363SPhilipp Reisner /* optimistically update recv_cnt. if receiving fails below, 2028b411b363SPhilipp Reisner * we disconnect anyways, and counters will be reset. */ 202969a22773SAndreas Gruenbacher peer_device->device->recv_cnt += data_size>>9; 2030b411b363SPhilipp Reisner 2031b411b363SPhilipp Reisner bio = req->master_bio; 203269a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector); 2033b411b363SPhilipp Reisner 20347988613bSKent Overstreet bio_for_each_segment(bvec, bio, iter) { 20357988613bSKent Overstreet void *mapped = kmap(bvec.bv_page) + bvec.bv_offset; 20367988613bSKent Overstreet expect = min_t(int, data_size, bvec.bv_len); 203769a22773SAndreas Gruenbacher err = drbd_recv_all_warn(peer_device->connection, mapped, expect); 20387988613bSKent Overstreet kunmap(bvec.bv_page); 2039a5c31904SAndreas Gruenbacher if (err) 2040a5c31904SAndreas Gruenbacher return err; 2041a5c31904SAndreas Gruenbacher data_size -= expect; 2042b411b363SPhilipp Reisner } 2043b411b363SPhilipp Reisner 204411f8b2b6SAndreas Gruenbacher if (digest_size) { 204569a22773SAndreas Gruenbacher drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv); 204611f8b2b6SAndreas Gruenbacher if (memcmp(dig_in, dig_vv, digest_size)) { 204769a22773SAndreas Gruenbacher drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n"); 204828284cefSAndreas Gruenbacher return -EINVAL; 2049b411b363SPhilipp Reisner } 2050b411b363SPhilipp Reisner } 2051b411b363SPhilipp Reisner 205269a22773SAndreas Gruenbacher D_ASSERT(peer_device->device, data_size == 0); 205328284cefSAndreas Gruenbacher return 0; 2054b411b363SPhilipp Reisner } 2055b411b363SPhilipp Reisner 2056a990be46SAndreas Gruenbacher /* 2057668700b4SPhilipp Reisner * e_end_resync_block() is called in ack_sender context via 2058a990be46SAndreas Gruenbacher * drbd_finish_peer_reqs(). 2059a990be46SAndreas Gruenbacher */ 206099920dc5SAndreas Gruenbacher static int e_end_resync_block(struct drbd_work *w, int unused) 2061b411b363SPhilipp Reisner { 20628050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 2063a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2064a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 2065a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2066db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 206799920dc5SAndreas Gruenbacher int err; 2068b411b363SPhilipp Reisner 20690b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 2070b411b363SPhilipp Reisner 2071db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 2072b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 2073a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); 2074b411b363SPhilipp Reisner } else { 2075b411b363SPhilipp Reisner /* Record failure to sync */ 2076b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, peer_req->i.size); 2077b411b363SPhilipp Reisner 2078a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 2079b411b363SPhilipp Reisner } 2080b30ab791SAndreas Gruenbacher dec_unacked(device); 2081b411b363SPhilipp Reisner 208299920dc5SAndreas Gruenbacher return err; 2083b411b363SPhilipp Reisner } 2084b411b363SPhilipp Reisner 208569a22773SAndreas Gruenbacher static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, 2086a0fb3c47SLars Ellenberg struct packet_info *pi) __releases(local) 2087b411b363SPhilipp Reisner { 208869a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2089db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2090b411b363SPhilipp Reisner 2091a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); 2092db830c46SAndreas Gruenbacher if (!peer_req) 209345bb912bSLars Ellenberg goto fail; 2094b411b363SPhilipp Reisner 2095b30ab791SAndreas Gruenbacher dec_rs_pending(device); 2096b411b363SPhilipp Reisner 2097b30ab791SAndreas Gruenbacher inc_unacked(device); 2098b411b363SPhilipp Reisner /* corresponding dec_unacked() in e_end_resync_block() 2099b411b363SPhilipp Reisner * respective _drbd_clear_done_ee */ 2100b411b363SPhilipp Reisner 2101a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_resync_block; 210221ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 210345bb912bSLars Ellenberg 21040500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2105b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->sync_ee); 21060500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2107b411b363SPhilipp Reisner 2108a0fb3c47SLars Ellenberg atomic_add(pi->size >> 9, &device->rs_sect_ev); 2109bb3cc85eSMike Christie if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0, 2110bb3cc85eSMike Christie DRBD_FAULT_RS_WR) == 0) 2111e1c1b0fcSAndreas Gruenbacher return 0; 211245bb912bSLars Ellenberg 211310f6d992SLars Ellenberg /* don't care for the reason here */ 2114d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 21150500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2116a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 21170500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 211822cc37a9SLars Ellenberg 2119b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 212045bb912bSLars Ellenberg fail: 2121b30ab791SAndreas Gruenbacher put_ldev(device); 2122e1c1b0fcSAndreas Gruenbacher return -EIO; 2123b411b363SPhilipp Reisner } 2124b411b363SPhilipp Reisner 2125668eebc6SAndreas Gruenbacher static struct drbd_request * 2126b30ab791SAndreas Gruenbacher find_request(struct drbd_device *device, struct rb_root *root, u64 id, 2127bc9c5c41SAndreas Gruenbacher sector_t sector, bool missing_ok, const char *func) 2128b411b363SPhilipp Reisner { 2129b411b363SPhilipp Reisner struct drbd_request *req; 2130668eebc6SAndreas Gruenbacher 2131bc9c5c41SAndreas Gruenbacher /* Request object according to our peer */ 2132bc9c5c41SAndreas Gruenbacher req = (struct drbd_request *)(unsigned long)id; 21335e472264SAndreas Gruenbacher if (drbd_contains_interval(root, sector, &req->i) && req->i.local) 2134668eebc6SAndreas Gruenbacher return req; 2135c3afd8f5SAndreas Gruenbacher if (!missing_ok) { 2136d0180171SAndreas Gruenbacher drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func, 2137c3afd8f5SAndreas Gruenbacher (unsigned long)id, (unsigned long long)sector); 2138c3afd8f5SAndreas Gruenbacher } 2139668eebc6SAndreas Gruenbacher return NULL; 2140668eebc6SAndreas Gruenbacher } 2141668eebc6SAndreas Gruenbacher 2142bde89a9eSAndreas Gruenbacher static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi) 2143b411b363SPhilipp Reisner { 21449f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2145b30ab791SAndreas Gruenbacher struct drbd_device *device; 2146b411b363SPhilipp Reisner struct drbd_request *req; 2147b411b363SPhilipp Reisner sector_t sector; 214882bc0194SAndreas Gruenbacher int err; 2149e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 21504a76b161SAndreas Gruenbacher 21519f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 21529f4fe9adSAndreas Gruenbacher if (!peer_device) 21534a76b161SAndreas Gruenbacher return -EIO; 21549f4fe9adSAndreas Gruenbacher device = peer_device->device; 2155b411b363SPhilipp Reisner 2156b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2157b411b363SPhilipp Reisner 21580500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2159b30ab791SAndreas Gruenbacher req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__); 21600500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2161c3afd8f5SAndreas Gruenbacher if (unlikely(!req)) 216282bc0194SAndreas Gruenbacher return -EIO; 2163b411b363SPhilipp Reisner 216424c4830cSBart Van Assche /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid 2165b411b363SPhilipp Reisner * special casing it there for the various failure cases. 2166b411b363SPhilipp Reisner * still no race with drbd_fail_pending_reads */ 216769a22773SAndreas Gruenbacher err = recv_dless_read(peer_device, req, sector, pi->size); 216882bc0194SAndreas Gruenbacher if (!err) 21698554df1cSAndreas Gruenbacher req_mod(req, DATA_RECEIVED); 2170b411b363SPhilipp Reisner /* else: nothing. handled from drbd_disconnect... 2171b411b363SPhilipp Reisner * I don't think we may complete this just yet 2172b411b363SPhilipp Reisner * in case we are "on-disconnect: freeze" */ 2173b411b363SPhilipp Reisner 217482bc0194SAndreas Gruenbacher return err; 2175b411b363SPhilipp Reisner } 2176b411b363SPhilipp Reisner 2177bde89a9eSAndreas Gruenbacher static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi) 2178b411b363SPhilipp Reisner { 21799f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2180b30ab791SAndreas Gruenbacher struct drbd_device *device; 2181b411b363SPhilipp Reisner sector_t sector; 218282bc0194SAndreas Gruenbacher int err; 2183e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 21844a76b161SAndreas Gruenbacher 21859f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 21869f4fe9adSAndreas Gruenbacher if (!peer_device) 21874a76b161SAndreas Gruenbacher return -EIO; 21889f4fe9adSAndreas Gruenbacher device = peer_device->device; 2189b411b363SPhilipp Reisner 2190b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 21910b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->block_id == ID_SYNCER); 2192b411b363SPhilipp Reisner 2193b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 2194b411b363SPhilipp Reisner /* data is submitted to disk within recv_resync_read. 2195b411b363SPhilipp Reisner * corresponding put_ldev done below on error, 2196fcefa62eSAndreas Gruenbacher * or in drbd_peer_request_endio. */ 2197a0fb3c47SLars Ellenberg err = recv_resync_read(peer_device, sector, pi); 2198b411b363SPhilipp Reisner } else { 2199b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 2200d0180171SAndreas Gruenbacher drbd_err(device, "Can not write resync data to local disk.\n"); 2201b411b363SPhilipp Reisner 220269a22773SAndreas Gruenbacher err = drbd_drain_block(peer_device, pi->size); 2203b411b363SPhilipp Reisner 220469a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 2205b411b363SPhilipp Reisner } 2206b411b363SPhilipp Reisner 2207b30ab791SAndreas Gruenbacher atomic_add(pi->size >> 9, &device->rs_sect_in); 2208778f271dSPhilipp Reisner 220982bc0194SAndreas Gruenbacher return err; 2210b411b363SPhilipp Reisner } 2211b411b363SPhilipp Reisner 2212b30ab791SAndreas Gruenbacher static void restart_conflicting_writes(struct drbd_device *device, 22137be8da07SAndreas Gruenbacher sector_t sector, int size) 2214b411b363SPhilipp Reisner { 22157be8da07SAndreas Gruenbacher struct drbd_interval *i; 22167be8da07SAndreas Gruenbacher struct drbd_request *req; 2217b411b363SPhilipp Reisner 2218b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 22197be8da07SAndreas Gruenbacher if (!i->local) 22207be8da07SAndreas Gruenbacher continue; 22217be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 22227be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 22237be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) 22247be8da07SAndreas Gruenbacher continue; 22252312f0b3SLars Ellenberg /* as it is RQ_POSTPONED, this will cause it to 22262312f0b3SLars Ellenberg * be queued on the retry workqueue. */ 2227d4dabbe2SLars Ellenberg __req_mod(req, CONFLICT_RESOLVED, NULL); 22287be8da07SAndreas Gruenbacher } 22297be8da07SAndreas Gruenbacher } 22307be8da07SAndreas Gruenbacher 2231a990be46SAndreas Gruenbacher /* 2232668700b4SPhilipp Reisner * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs(). 2233b411b363SPhilipp Reisner */ 223499920dc5SAndreas Gruenbacher static int e_end_block(struct drbd_work *w, int cancel) 2235b411b363SPhilipp Reisner { 22368050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 2237a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2238a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 2239a8cd15baSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2240db830c46SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 224199920dc5SAndreas Gruenbacher int err = 0, pcmd; 2242b411b363SPhilipp Reisner 2243303d1448SPhilipp Reisner if (peer_req->flags & EE_SEND_WRITE_ACK) { 2244db830c46SAndreas Gruenbacher if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { 2245b30ab791SAndreas Gruenbacher pcmd = (device->state.conn >= C_SYNC_SOURCE && 2246b30ab791SAndreas Gruenbacher device->state.conn <= C_PAUSED_SYNC_T && 2247db830c46SAndreas Gruenbacher peer_req->flags & EE_MAY_SET_IN_SYNC) ? 2248b411b363SPhilipp Reisner P_RS_WRITE_ACK : P_WRITE_ACK; 2249a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, pcmd, peer_req); 2250b411b363SPhilipp Reisner if (pcmd == P_RS_WRITE_ACK) 2251b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, peer_req->i.size); 2252b411b363SPhilipp Reisner } else { 2253a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); 2254b411b363SPhilipp Reisner /* we expect it to be marked out of sync anyways... 2255b411b363SPhilipp Reisner * maybe assert this? */ 2256b411b363SPhilipp Reisner } 2257b30ab791SAndreas Gruenbacher dec_unacked(device); 2258b411b363SPhilipp Reisner } 225908d0dabfSLars Ellenberg 2260b411b363SPhilipp Reisner /* we delete from the conflict detection hash _after_ we sent out the 2261b411b363SPhilipp Reisner * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 2262302bdeaeSPhilipp Reisner if (peer_req->flags & EE_IN_INTERVAL_TREE) { 22630500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 22640b0ba1efSAndreas Gruenbacher D_ASSERT(device, !drbd_interval_empty(&peer_req->i)); 2265b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 22667be8da07SAndreas Gruenbacher if (peer_req->flags & EE_RESTART_REQUESTS) 2267b30ab791SAndreas Gruenbacher restart_conflicting_writes(device, sector, peer_req->i.size); 22680500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2269bb3bfe96SAndreas Gruenbacher } else 22700b0ba1efSAndreas Gruenbacher D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 2271b411b363SPhilipp Reisner 22725dd2ca19SAndreas Gruenbacher drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); 2273b411b363SPhilipp Reisner 227499920dc5SAndreas Gruenbacher return err; 2275b411b363SPhilipp Reisner } 2276b411b363SPhilipp Reisner 2277a8cd15baSAndreas Gruenbacher static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) 2278b411b363SPhilipp Reisner { 22798050e6d0SAndreas Gruenbacher struct drbd_peer_request *peer_req = 2280a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2281a8cd15baSAndreas Gruenbacher struct drbd_peer_device *peer_device = peer_req->peer_device; 228299920dc5SAndreas Gruenbacher int err; 2283b411b363SPhilipp Reisner 2284a8cd15baSAndreas Gruenbacher err = drbd_send_ack(peer_device, ack, peer_req); 2285a8cd15baSAndreas Gruenbacher dec_unacked(peer_device->device); 2286b411b363SPhilipp Reisner 228799920dc5SAndreas Gruenbacher return err; 2288b411b363SPhilipp Reisner } 2289b411b363SPhilipp Reisner 2290d4dabbe2SLars Ellenberg static int e_send_superseded(struct drbd_work *w, int unused) 2291b6a370baSPhilipp Reisner { 2292a8cd15baSAndreas Gruenbacher return e_send_ack(w, P_SUPERSEDED); 22937be8da07SAndreas Gruenbacher } 2294b6a370baSPhilipp Reisner 229599920dc5SAndreas Gruenbacher static int e_send_retry_write(struct drbd_work *w, int unused) 22967be8da07SAndreas Gruenbacher { 2297a8cd15baSAndreas Gruenbacher struct drbd_peer_request *peer_req = 2298a8cd15baSAndreas Gruenbacher container_of(w, struct drbd_peer_request, w); 2299a8cd15baSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 23007be8da07SAndreas Gruenbacher 2301a8cd15baSAndreas Gruenbacher return e_send_ack(w, connection->agreed_pro_version >= 100 ? 2302d4dabbe2SLars Ellenberg P_RETRY_WRITE : P_SUPERSEDED); 23037be8da07SAndreas Gruenbacher } 23047be8da07SAndreas Gruenbacher 23053e394da1SAndreas Gruenbacher static bool seq_greater(u32 a, u32 b) 23063e394da1SAndreas Gruenbacher { 23073e394da1SAndreas Gruenbacher /* 23083e394da1SAndreas Gruenbacher * We assume 32-bit wrap-around here. 23093e394da1SAndreas Gruenbacher * For 24-bit wrap-around, we would have to shift: 23103e394da1SAndreas Gruenbacher * a <<= 8; b <<= 8; 23113e394da1SAndreas Gruenbacher */ 23123e394da1SAndreas Gruenbacher return (s32)a - (s32)b > 0; 23133e394da1SAndreas Gruenbacher } 23143e394da1SAndreas Gruenbacher 23153e394da1SAndreas Gruenbacher static u32 seq_max(u32 a, u32 b) 23163e394da1SAndreas Gruenbacher { 23173e394da1SAndreas Gruenbacher return seq_greater(a, b) ? a : b; 23183e394da1SAndreas Gruenbacher } 23193e394da1SAndreas Gruenbacher 232069a22773SAndreas Gruenbacher static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq) 23213e394da1SAndreas Gruenbacher { 232269a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 23233c13b680SLars Ellenberg unsigned int newest_peer_seq; 23243e394da1SAndreas Gruenbacher 232569a22773SAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) { 2326b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2327b30ab791SAndreas Gruenbacher newest_peer_seq = seq_max(device->peer_seq, peer_seq); 2328b30ab791SAndreas Gruenbacher device->peer_seq = newest_peer_seq; 2329b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2330b30ab791SAndreas Gruenbacher /* wake up only if we actually changed device->peer_seq */ 23313c13b680SLars Ellenberg if (peer_seq == newest_peer_seq) 2332b30ab791SAndreas Gruenbacher wake_up(&device->seq_wait); 23333e394da1SAndreas Gruenbacher } 23347be8da07SAndreas Gruenbacher } 23353e394da1SAndreas Gruenbacher 2336d93f6302SLars Ellenberg static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) 2337d93f6302SLars Ellenberg { 2338d93f6302SLars Ellenberg return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); 2339d93f6302SLars Ellenberg } 2340d93f6302SLars Ellenberg 2341d93f6302SLars Ellenberg /* maybe change sync_ee into interval trees as well? */ 2342b30ab791SAndreas Gruenbacher static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) 2343d93f6302SLars Ellenberg { 2344d93f6302SLars Ellenberg struct drbd_peer_request *rs_req; 23457e5fec31SFabian Frederick bool rv = false; 2346b6a370baSPhilipp Reisner 23470500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2348a8cd15baSAndreas Gruenbacher list_for_each_entry(rs_req, &device->sync_ee, w.list) { 2349d93f6302SLars Ellenberg if (overlaps(peer_req->i.sector, peer_req->i.size, 2350d93f6302SLars Ellenberg rs_req->i.sector, rs_req->i.size)) { 23517e5fec31SFabian Frederick rv = true; 2352b6a370baSPhilipp Reisner break; 2353b6a370baSPhilipp Reisner } 2354b6a370baSPhilipp Reisner } 23550500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2356b6a370baSPhilipp Reisner 2357b6a370baSPhilipp Reisner return rv; 2358b6a370baSPhilipp Reisner } 2359b6a370baSPhilipp Reisner 2360b411b363SPhilipp Reisner /* Called from receive_Data. 2361b411b363SPhilipp Reisner * Synchronize packets on sock with packets on msock. 2362b411b363SPhilipp Reisner * 2363b411b363SPhilipp Reisner * This is here so even when a P_DATA packet traveling via sock overtook an Ack 2364b411b363SPhilipp Reisner * packet traveling on msock, they are still processed in the order they have 2365b411b363SPhilipp Reisner * been sent. 2366b411b363SPhilipp Reisner * 2367b411b363SPhilipp Reisner * Note: we don't care for Ack packets overtaking P_DATA packets. 2368b411b363SPhilipp Reisner * 2369b30ab791SAndreas Gruenbacher * In case packet_seq is larger than device->peer_seq number, there are 2370b411b363SPhilipp Reisner * outstanding packets on the msock. We wait for them to arrive. 2371b30ab791SAndreas Gruenbacher * In case we are the logically next packet, we update device->peer_seq 2372b411b363SPhilipp Reisner * ourselves. Correctly handles 32bit wrap around. 2373b411b363SPhilipp Reisner * 2374b411b363SPhilipp Reisner * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, 2375b411b363SPhilipp Reisner * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds 2376b411b363SPhilipp Reisner * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have 2377b411b363SPhilipp Reisner * 1<<9 == 512 seconds aka ages for the 32bit wrap around... 2378b411b363SPhilipp Reisner * 2379b411b363SPhilipp Reisner * returns 0 if we may process the packet, 2380b411b363SPhilipp Reisner * -ERESTARTSYS if we were interrupted (by disconnect signal). */ 238169a22773SAndreas Gruenbacher static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq) 2382b411b363SPhilipp Reisner { 238369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 2384b411b363SPhilipp Reisner DEFINE_WAIT(wait); 2385b411b363SPhilipp Reisner long timeout; 2386b874d231SPhilipp Reisner int ret = 0, tp; 23877be8da07SAndreas Gruenbacher 238869a22773SAndreas Gruenbacher if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) 23897be8da07SAndreas Gruenbacher return 0; 23907be8da07SAndreas Gruenbacher 2391b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 2392b411b363SPhilipp Reisner for (;;) { 2393b30ab791SAndreas Gruenbacher if (!seq_greater(peer_seq - 1, device->peer_seq)) { 2394b30ab791SAndreas Gruenbacher device->peer_seq = seq_max(device->peer_seq, peer_seq); 2395b411b363SPhilipp Reisner break; 23967be8da07SAndreas Gruenbacher } 2397b874d231SPhilipp Reisner 2398b411b363SPhilipp Reisner if (signal_pending(current)) { 2399b411b363SPhilipp Reisner ret = -ERESTARTSYS; 2400b411b363SPhilipp Reisner break; 2401b411b363SPhilipp Reisner } 2402b874d231SPhilipp Reisner 2403b874d231SPhilipp Reisner rcu_read_lock(); 24045dd2ca19SAndreas Gruenbacher tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 2405b874d231SPhilipp Reisner rcu_read_unlock(); 2406b874d231SPhilipp Reisner 2407b874d231SPhilipp Reisner if (!tp) 2408b874d231SPhilipp Reisner break; 2409b874d231SPhilipp Reisner 2410b874d231SPhilipp Reisner /* Only need to wait if two_primaries is enabled */ 2411b30ab791SAndreas Gruenbacher prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE); 2412b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 241344ed167dSPhilipp Reisner rcu_read_lock(); 241469a22773SAndreas Gruenbacher timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10; 241544ed167dSPhilipp Reisner rcu_read_unlock(); 241671b1c1ebSAndreas Gruenbacher timeout = schedule_timeout(timeout); 2417b30ab791SAndreas Gruenbacher spin_lock(&device->peer_seq_lock); 24187be8da07SAndreas Gruenbacher if (!timeout) { 2419b411b363SPhilipp Reisner ret = -ETIMEDOUT; 2420d0180171SAndreas Gruenbacher drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n"); 2421b411b363SPhilipp Reisner break; 2422b411b363SPhilipp Reisner } 2423b411b363SPhilipp Reisner } 2424b30ab791SAndreas Gruenbacher spin_unlock(&device->peer_seq_lock); 2425b30ab791SAndreas Gruenbacher finish_wait(&device->seq_wait, &wait); 2426b411b363SPhilipp Reisner return ret; 2427b411b363SPhilipp Reisner } 2428b411b363SPhilipp Reisner 2429688593c5SLars Ellenberg /* see also bio_flags_to_wire() 2430688593c5SLars Ellenberg * DRBD_REQ_*, because we need to semantically map the flags to data packet 2431688593c5SLars Ellenberg * flags and back. We may replicate to other kernel versions. */ 2432bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_flags(u32 dpf) 243376d2e7ecSPhilipp Reisner { 243476d2e7ecSPhilipp Reisner return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | 243576d2e7ecSPhilipp Reisner (dpf & DP_FUA ? REQ_FUA : 0) | 243628a8f0d3SMike Christie (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); 2437bb3cc85eSMike Christie } 2438bb3cc85eSMike Christie 2439bb3cc85eSMike Christie static unsigned long wire_flags_to_bio_op(u32 dpf) 2440bb3cc85eSMike Christie { 2441f31e583aSLars Ellenberg if (dpf & DP_ZEROES) 244245c21793SChristoph Hellwig return REQ_OP_WRITE_ZEROES; 2443f31e583aSLars Ellenberg if (dpf & DP_DISCARD) 2444f31e583aSLars Ellenberg return REQ_OP_DISCARD; 2445f31e583aSLars Ellenberg if (dpf & DP_WSAME) 2446f31e583aSLars Ellenberg return REQ_OP_WRITE_SAME; 2447bb3cc85eSMike Christie else 2448bb3cc85eSMike Christie return REQ_OP_WRITE; 244976d2e7ecSPhilipp Reisner } 245076d2e7ecSPhilipp Reisner 2451b30ab791SAndreas Gruenbacher static void fail_postponed_requests(struct drbd_device *device, sector_t sector, 24527be8da07SAndreas Gruenbacher unsigned int size) 2453b411b363SPhilipp Reisner { 24547be8da07SAndreas Gruenbacher struct drbd_interval *i; 24557be8da07SAndreas Gruenbacher 24567be8da07SAndreas Gruenbacher repeat: 2457b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 24587be8da07SAndreas Gruenbacher struct drbd_request *req; 24597be8da07SAndreas Gruenbacher struct bio_and_error m; 24607be8da07SAndreas Gruenbacher 24617be8da07SAndreas Gruenbacher if (!i->local) 24627be8da07SAndreas Gruenbacher continue; 24637be8da07SAndreas Gruenbacher req = container_of(i, struct drbd_request, i); 24647be8da07SAndreas Gruenbacher if (!(req->rq_state & RQ_POSTPONED)) 24657be8da07SAndreas Gruenbacher continue; 24667be8da07SAndreas Gruenbacher req->rq_state &= ~RQ_POSTPONED; 24677be8da07SAndreas Gruenbacher __req_mod(req, NEG_ACKED, &m); 24680500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 24697be8da07SAndreas Gruenbacher if (m.bio) 2470b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 24710500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 24727be8da07SAndreas Gruenbacher goto repeat; 24737be8da07SAndreas Gruenbacher } 24747be8da07SAndreas Gruenbacher } 24757be8da07SAndreas Gruenbacher 2476b30ab791SAndreas Gruenbacher static int handle_write_conflicts(struct drbd_device *device, 24777be8da07SAndreas Gruenbacher struct drbd_peer_request *peer_req) 24787be8da07SAndreas Gruenbacher { 2479e33b32deSAndreas Gruenbacher struct drbd_connection *connection = peer_req->peer_device->connection; 2480bde89a9eSAndreas Gruenbacher bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags); 24817be8da07SAndreas Gruenbacher sector_t sector = peer_req->i.sector; 24827be8da07SAndreas Gruenbacher const unsigned int size = peer_req->i.size; 24837be8da07SAndreas Gruenbacher struct drbd_interval *i; 24847be8da07SAndreas Gruenbacher bool equal; 24857be8da07SAndreas Gruenbacher int err; 24867be8da07SAndreas Gruenbacher 24877be8da07SAndreas Gruenbacher /* 24887be8da07SAndreas Gruenbacher * Inserting the peer request into the write_requests tree will prevent 24897be8da07SAndreas Gruenbacher * new conflicting local requests from being added. 24907be8da07SAndreas Gruenbacher */ 2491b30ab791SAndreas Gruenbacher drbd_insert_interval(&device->write_requests, &peer_req->i); 24927be8da07SAndreas Gruenbacher 24937be8da07SAndreas Gruenbacher repeat: 2494b30ab791SAndreas Gruenbacher drbd_for_each_overlap(i, &device->write_requests, sector, size) { 24957be8da07SAndreas Gruenbacher if (i == &peer_req->i) 24967be8da07SAndreas Gruenbacher continue; 249708d0dabfSLars Ellenberg if (i->completed) 249808d0dabfSLars Ellenberg continue; 24997be8da07SAndreas Gruenbacher 25007be8da07SAndreas Gruenbacher if (!i->local) { 25017be8da07SAndreas Gruenbacher /* 25027be8da07SAndreas Gruenbacher * Our peer has sent a conflicting remote request; this 25037be8da07SAndreas Gruenbacher * should not happen in a two-node setup. Wait for the 25047be8da07SAndreas Gruenbacher * earlier peer request to complete. 25057be8da07SAndreas Gruenbacher */ 2506b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, i); 25077be8da07SAndreas Gruenbacher if (err) 25087be8da07SAndreas Gruenbacher goto out; 25097be8da07SAndreas Gruenbacher goto repeat; 25107be8da07SAndreas Gruenbacher } 25117be8da07SAndreas Gruenbacher 25127be8da07SAndreas Gruenbacher equal = i->sector == sector && i->size == size; 25137be8da07SAndreas Gruenbacher if (resolve_conflicts) { 25147be8da07SAndreas Gruenbacher /* 25157be8da07SAndreas Gruenbacher * If the peer request is fully contained within the 2516d4dabbe2SLars Ellenberg * overlapping request, it can be considered overwritten 2517d4dabbe2SLars Ellenberg * and thus superseded; otherwise, it will be retried 2518d4dabbe2SLars Ellenberg * once all overlapping requests have completed. 25197be8da07SAndreas Gruenbacher */ 2520d4dabbe2SLars Ellenberg bool superseded = i->sector <= sector && i->sector + 25217be8da07SAndreas Gruenbacher (i->size >> 9) >= sector + (size >> 9); 25227be8da07SAndreas Gruenbacher 25237be8da07SAndreas Gruenbacher if (!equal) 2524d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 25257be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u, " 25267be8da07SAndreas Gruenbacher "assuming %s came first\n", 25277be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 25287be8da07SAndreas Gruenbacher (unsigned long long)sector, size, 2529d4dabbe2SLars Ellenberg superseded ? "local" : "remote"); 25307be8da07SAndreas Gruenbacher 2531a8cd15baSAndreas Gruenbacher peer_req->w.cb = superseded ? e_send_superseded : 25327be8da07SAndreas Gruenbacher e_send_retry_write; 2533a8cd15baSAndreas Gruenbacher list_add_tail(&peer_req->w.list, &device->done_ee); 2534668700b4SPhilipp Reisner queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work); 25357be8da07SAndreas Gruenbacher 25367be8da07SAndreas Gruenbacher err = -ENOENT; 25377be8da07SAndreas Gruenbacher goto out; 25387be8da07SAndreas Gruenbacher } else { 25397be8da07SAndreas Gruenbacher struct drbd_request *req = 25407be8da07SAndreas Gruenbacher container_of(i, struct drbd_request, i); 25417be8da07SAndreas Gruenbacher 25427be8da07SAndreas Gruenbacher if (!equal) 2543d0180171SAndreas Gruenbacher drbd_alert(device, "Concurrent writes detected: " 25447be8da07SAndreas Gruenbacher "local=%llus +%u, remote=%llus +%u\n", 25457be8da07SAndreas Gruenbacher (unsigned long long)i->sector, i->size, 25467be8da07SAndreas Gruenbacher (unsigned long long)sector, size); 25477be8da07SAndreas Gruenbacher 25487be8da07SAndreas Gruenbacher if (req->rq_state & RQ_LOCAL_PENDING || 25497be8da07SAndreas Gruenbacher !(req->rq_state & RQ_POSTPONED)) { 25507be8da07SAndreas Gruenbacher /* 25517be8da07SAndreas Gruenbacher * Wait for the node with the discard flag to 2552d4dabbe2SLars Ellenberg * decide if this request has been superseded 2553d4dabbe2SLars Ellenberg * or needs to be retried. 2554d4dabbe2SLars Ellenberg * Requests that have been superseded will 25557be8da07SAndreas Gruenbacher * disappear from the write_requests tree. 25567be8da07SAndreas Gruenbacher * 25577be8da07SAndreas Gruenbacher * In addition, wait for the conflicting 25587be8da07SAndreas Gruenbacher * request to finish locally before submitting 25597be8da07SAndreas Gruenbacher * the conflicting peer request. 25607be8da07SAndreas Gruenbacher */ 2561b30ab791SAndreas Gruenbacher err = drbd_wait_misc(device, &req->i); 25627be8da07SAndreas Gruenbacher if (err) { 2563e33b32deSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD); 2564b30ab791SAndreas Gruenbacher fail_postponed_requests(device, sector, size); 25657be8da07SAndreas Gruenbacher goto out; 25667be8da07SAndreas Gruenbacher } 25677be8da07SAndreas Gruenbacher goto repeat; 25687be8da07SAndreas Gruenbacher } 25697be8da07SAndreas Gruenbacher /* 25707be8da07SAndreas Gruenbacher * Remember to restart the conflicting requests after 25717be8da07SAndreas Gruenbacher * the new peer request has completed. 25727be8da07SAndreas Gruenbacher */ 25737be8da07SAndreas Gruenbacher peer_req->flags |= EE_RESTART_REQUESTS; 25747be8da07SAndreas Gruenbacher } 25757be8da07SAndreas Gruenbacher } 25767be8da07SAndreas Gruenbacher err = 0; 25777be8da07SAndreas Gruenbacher 25787be8da07SAndreas Gruenbacher out: 25797be8da07SAndreas Gruenbacher if (err) 2580b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 25817be8da07SAndreas Gruenbacher return err; 25827be8da07SAndreas Gruenbacher } 25837be8da07SAndreas Gruenbacher 2584b411b363SPhilipp Reisner /* mirrored write */ 2585bde89a9eSAndreas Gruenbacher static int receive_Data(struct drbd_connection *connection, struct packet_info *pi) 2586b411b363SPhilipp Reisner { 25879f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2588b30ab791SAndreas Gruenbacher struct drbd_device *device; 258921ae5d7fSLars Ellenberg struct net_conf *nc; 2590b411b363SPhilipp Reisner sector_t sector; 2591db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2592e658983aSAndreas Gruenbacher struct p_data *p = pi->data; 25937be8da07SAndreas Gruenbacher u32 peer_seq = be32_to_cpu(p->seq_num); 2594bb3cc85eSMike Christie int op, op_flags; 2595b411b363SPhilipp Reisner u32 dp_flags; 2596302bdeaeSPhilipp Reisner int err, tp; 25977be8da07SAndreas Gruenbacher 25989f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 25999f4fe9adSAndreas Gruenbacher if (!peer_device) 26004a76b161SAndreas Gruenbacher return -EIO; 26019f4fe9adSAndreas Gruenbacher device = peer_device->device; 2602b411b363SPhilipp Reisner 2603b30ab791SAndreas Gruenbacher if (!get_ldev(device)) { 260482bc0194SAndreas Gruenbacher int err2; 2605b411b363SPhilipp Reisner 260669a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 260769a22773SAndreas Gruenbacher drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size); 2608bde89a9eSAndreas Gruenbacher atomic_inc(&connection->current_epoch->epoch_size); 260969a22773SAndreas Gruenbacher err2 = drbd_drain_block(peer_device, pi->size); 261082bc0194SAndreas Gruenbacher if (!err) 261182bc0194SAndreas Gruenbacher err = err2; 261282bc0194SAndreas Gruenbacher return err; 2613b411b363SPhilipp Reisner } 2614b411b363SPhilipp Reisner 2615fcefa62eSAndreas Gruenbacher /* 2616fcefa62eSAndreas Gruenbacher * Corresponding put_ldev done either below (on various errors), or in 2617fcefa62eSAndreas Gruenbacher * drbd_peer_request_endio, if we successfully submit the data at the 2618fcefa62eSAndreas Gruenbacher * end of this function. 2619fcefa62eSAndreas Gruenbacher */ 2620b411b363SPhilipp Reisner 2621b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2622a0fb3c47SLars Ellenberg peer_req = read_in_block(peer_device, p->block_id, sector, pi); 2623db830c46SAndreas Gruenbacher if (!peer_req) { 2624b30ab791SAndreas Gruenbacher put_ldev(device); 262582bc0194SAndreas Gruenbacher return -EIO; 2626b411b363SPhilipp Reisner } 2627b411b363SPhilipp Reisner 2628a8cd15baSAndreas Gruenbacher peer_req->w.cb = e_end_block; 262921ae5d7fSLars Ellenberg peer_req->submit_jif = jiffies; 263021ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION; 2631b411b363SPhilipp Reisner 2632688593c5SLars Ellenberg dp_flags = be32_to_cpu(p->dp_flags); 2633bb3cc85eSMike Christie op = wire_flags_to_bio_op(dp_flags); 2634bb3cc85eSMike Christie op_flags = wire_flags_to_bio_flags(dp_flags); 2635a0fb3c47SLars Ellenberg if (pi->cmd == P_TRIM) { 2636a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0); 2637f31e583aSLars Ellenberg D_ASSERT(peer_device, op == REQ_OP_DISCARD); 2638f31e583aSLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL); 2639f31e583aSLars Ellenberg /* need to play safe: an older DRBD sender 2640f31e583aSLars Ellenberg * may mean zero-out while sending P_TRIM. */ 2641f31e583aSLars Ellenberg if (0 == (connection->agreed_features & DRBD_FF_WZEROES)) 2642f31e583aSLars Ellenberg peer_req->flags |= EE_ZEROOUT; 2643f31e583aSLars Ellenberg } else if (pi->cmd == P_ZEROES) { 2644f31e583aSLars Ellenberg D_ASSERT(peer_device, peer_req->i.size > 0); 264545c21793SChristoph Hellwig D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES); 2646a0fb3c47SLars Ellenberg D_ASSERT(peer_device, peer_req->pages == NULL); 2647f31e583aSLars Ellenberg /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */ 2648f31e583aSLars Ellenberg if (dp_flags & DP_DISCARD) 2649f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM; 2650a0fb3c47SLars Ellenberg } else if (peer_req->pages == NULL) { 26510b0ba1efSAndreas Gruenbacher D_ASSERT(device, peer_req->i.size == 0); 26520b0ba1efSAndreas Gruenbacher D_ASSERT(device, dp_flags & DP_FLUSH); 2653a73ff323SLars Ellenberg } 2654688593c5SLars Ellenberg 2655688593c5SLars Ellenberg if (dp_flags & DP_MAY_SET_IN_SYNC) 2656db830c46SAndreas Gruenbacher peer_req->flags |= EE_MAY_SET_IN_SYNC; 2657688593c5SLars Ellenberg 2658bde89a9eSAndreas Gruenbacher spin_lock(&connection->epoch_lock); 2659bde89a9eSAndreas Gruenbacher peer_req->epoch = connection->current_epoch; 2660db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->epoch_size); 2661db830c46SAndreas Gruenbacher atomic_inc(&peer_req->epoch->active); 2662bde89a9eSAndreas Gruenbacher spin_unlock(&connection->epoch_lock); 2663b411b363SPhilipp Reisner 2664302bdeaeSPhilipp Reisner rcu_read_lock(); 266521ae5d7fSLars Ellenberg nc = rcu_dereference(peer_device->connection->net_conf); 266621ae5d7fSLars Ellenberg tp = nc->two_primaries; 266721ae5d7fSLars Ellenberg if (peer_device->connection->agreed_pro_version < 100) { 266821ae5d7fSLars Ellenberg switch (nc->wire_protocol) { 266921ae5d7fSLars Ellenberg case DRBD_PROT_C: 267021ae5d7fSLars Ellenberg dp_flags |= DP_SEND_WRITE_ACK; 267121ae5d7fSLars Ellenberg break; 267221ae5d7fSLars Ellenberg case DRBD_PROT_B: 267321ae5d7fSLars Ellenberg dp_flags |= DP_SEND_RECEIVE_ACK; 267421ae5d7fSLars Ellenberg break; 267521ae5d7fSLars Ellenberg } 267621ae5d7fSLars Ellenberg } 2677302bdeaeSPhilipp Reisner rcu_read_unlock(); 267821ae5d7fSLars Ellenberg 267921ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_WRITE_ACK) { 268021ae5d7fSLars Ellenberg peer_req->flags |= EE_SEND_WRITE_ACK; 268121ae5d7fSLars Ellenberg inc_unacked(device); 268221ae5d7fSLars Ellenberg /* corresponding dec_unacked() in e_end_block() 268321ae5d7fSLars Ellenberg * respective _drbd_clear_done_ee */ 268421ae5d7fSLars Ellenberg } 268521ae5d7fSLars Ellenberg 268621ae5d7fSLars Ellenberg if (dp_flags & DP_SEND_RECEIVE_ACK) { 268721ae5d7fSLars Ellenberg /* I really don't like it that the receiver thread 268821ae5d7fSLars Ellenberg * sends on the msock, but anyways */ 26895dd2ca19SAndreas Gruenbacher drbd_send_ack(peer_device, P_RECV_ACK, peer_req); 269021ae5d7fSLars Ellenberg } 269121ae5d7fSLars Ellenberg 2692302bdeaeSPhilipp Reisner if (tp) { 269321ae5d7fSLars Ellenberg /* two primaries implies protocol C */ 269421ae5d7fSLars Ellenberg D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); 2695302bdeaeSPhilipp Reisner peer_req->flags |= EE_IN_INTERVAL_TREE; 269669a22773SAndreas Gruenbacher err = wait_for_and_update_peer_seq(peer_device, peer_seq); 26977be8da07SAndreas Gruenbacher if (err) 2698b411b363SPhilipp Reisner goto out_interrupted; 26990500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2700b30ab791SAndreas Gruenbacher err = handle_write_conflicts(device, peer_req); 27017be8da07SAndreas Gruenbacher if (err) { 27020500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 27037be8da07SAndreas Gruenbacher if (err == -ENOENT) { 2704b30ab791SAndreas Gruenbacher put_ldev(device); 270582bc0194SAndreas Gruenbacher return 0; 2706b411b363SPhilipp Reisner } 2707b411b363SPhilipp Reisner goto out_interrupted; 2708b411b363SPhilipp Reisner } 2709b874d231SPhilipp Reisner } else { 271069a22773SAndreas Gruenbacher update_peer_seq(peer_device, peer_seq); 27110500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2712b874d231SPhilipp Reisner } 27139104d31aSLars Ellenberg /* TRIM and WRITE_SAME are processed synchronously, 27149104d31aSLars Ellenberg * we wait for all pending requests, respectively wait for 2715a0fb3c47SLars Ellenberg * active_ee to become empty in drbd_submit_peer_request(); 2716a0fb3c47SLars Ellenberg * better not add ourselves here. */ 2717f31e583aSLars Ellenberg if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0) 2718b9ed7080SLars Ellenberg list_add_tail(&peer_req->w.list, &device->active_ee); 27190500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 2720b411b363SPhilipp Reisner 2721b30ab791SAndreas Gruenbacher if (device->state.conn == C_SYNC_TARGET) 2722b30ab791SAndreas Gruenbacher wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 2723b6a370baSPhilipp Reisner 2724b30ab791SAndreas Gruenbacher if (device->state.pdsk < D_INCONSISTENT) { 2725b411b363SPhilipp Reisner /* In case we have the only disk of the cluster, */ 2726b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 2727db830c46SAndreas Gruenbacher peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 27284dd726f0SLars Ellenberg drbd_al_begin_io(device, &peer_req->i); 272921ae5d7fSLars Ellenberg peer_req->flags |= EE_CALL_AL_COMPLETE_IO; 2730b411b363SPhilipp Reisner } 2731b411b363SPhilipp Reisner 2732bb3cc85eSMike Christie err = drbd_submit_peer_request(device, peer_req, op, op_flags, 2733bb3cc85eSMike Christie DRBD_FAULT_DT_WR); 273482bc0194SAndreas Gruenbacher if (!err) 273582bc0194SAndreas Gruenbacher return 0; 2736b411b363SPhilipp Reisner 273710f6d992SLars Ellenberg /* don't care for the reason here */ 2738d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 27390500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 2740a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 2741b30ab791SAndreas Gruenbacher drbd_remove_epoch_entry_interval(device, peer_req); 27420500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 274321ae5d7fSLars Ellenberg if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { 274421ae5d7fSLars Ellenberg peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; 2745b30ab791SAndreas Gruenbacher drbd_al_complete_io(device, &peer_req->i); 274621ae5d7fSLars Ellenberg } 274722cc37a9SLars Ellenberg 2748b411b363SPhilipp Reisner out_interrupted: 27497e5fec31SFabian Frederick drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP); 2750b30ab791SAndreas Gruenbacher put_ldev(device); 2751b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 275282bc0194SAndreas Gruenbacher return err; 2753b411b363SPhilipp Reisner } 2754b411b363SPhilipp Reisner 27550f0601f4SLars Ellenberg /* We may throttle resync, if the lower device seems to be busy, 27560f0601f4SLars Ellenberg * and current sync rate is above c_min_rate. 27570f0601f4SLars Ellenberg * 27580f0601f4SLars Ellenberg * To decide whether or not the lower device is busy, we use a scheme similar 27590f0601f4SLars Ellenberg * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" 27600f0601f4SLars Ellenberg * (more than 64 sectors) of activity we cannot account for with our own resync 27610f0601f4SLars Ellenberg * activity, it obviously is "busy". 27620f0601f4SLars Ellenberg * 27630f0601f4SLars Ellenberg * The current sync rate used here uses only the most recent two step marks, 27640f0601f4SLars Ellenberg * to have a short time average so we can react faster. 27650f0601f4SLars Ellenberg */ 2766ad3fee79SLars Ellenberg bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, 2767ad3fee79SLars Ellenberg bool throttle_if_app_is_waiting) 2768e8299874SLars Ellenberg { 2769e8299874SLars Ellenberg struct lc_element *tmp; 2770ad3fee79SLars Ellenberg bool throttle = drbd_rs_c_min_rate_throttle(device); 2771e8299874SLars Ellenberg 2772ad3fee79SLars Ellenberg if (!throttle || throttle_if_app_is_waiting) 2773ad3fee79SLars Ellenberg return throttle; 2774e8299874SLars Ellenberg 2775e8299874SLars Ellenberg spin_lock_irq(&device->al_lock); 2776e8299874SLars Ellenberg tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 2777e8299874SLars Ellenberg if (tmp) { 2778e8299874SLars Ellenberg struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 2779e8299874SLars Ellenberg if (test_bit(BME_PRIORITY, &bm_ext->flags)) 2780e8299874SLars Ellenberg throttle = false; 2781ad3fee79SLars Ellenberg /* Do not slow down if app IO is already waiting for this extent, 2782ad3fee79SLars Ellenberg * and our progress is necessary for application IO to complete. */ 2783e8299874SLars Ellenberg } 2784e8299874SLars Ellenberg spin_unlock_irq(&device->al_lock); 2785e8299874SLars Ellenberg 2786e8299874SLars Ellenberg return throttle; 2787e8299874SLars Ellenberg } 2788e8299874SLars Ellenberg 2789e8299874SLars Ellenberg bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) 27900f0601f4SLars Ellenberg { 2791b30ab791SAndreas Gruenbacher struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; 27920f0601f4SLars Ellenberg unsigned long db, dt, dbdt; 2793daeda1ccSPhilipp Reisner unsigned int c_min_rate; 2794e8299874SLars Ellenberg int curr_events; 2795daeda1ccSPhilipp Reisner 2796daeda1ccSPhilipp Reisner rcu_read_lock(); 2797b30ab791SAndreas Gruenbacher c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; 2798daeda1ccSPhilipp Reisner rcu_read_unlock(); 27990f0601f4SLars Ellenberg 28000f0601f4SLars Ellenberg /* feature disabled? */ 2801daeda1ccSPhilipp Reisner if (c_min_rate == 0) 2802e8299874SLars Ellenberg return false; 2803e3555d85SPhilipp Reisner 280459767fbdSMichael Callahan curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - 2805b30ab791SAndreas Gruenbacher atomic_read(&device->rs_sect_ev); 2806ad3fee79SLars Ellenberg 2807ad3fee79SLars Ellenberg if (atomic_read(&device->ap_actlog_cnt) 2808ff8bd88bSLars Ellenberg || curr_events - device->rs_last_events > 64) { 28090f0601f4SLars Ellenberg unsigned long rs_left; 28100f0601f4SLars Ellenberg int i; 28110f0601f4SLars Ellenberg 2812b30ab791SAndreas Gruenbacher device->rs_last_events = curr_events; 28130f0601f4SLars Ellenberg 28140f0601f4SLars Ellenberg /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, 28150f0601f4SLars Ellenberg * approx. */ 2816b30ab791SAndreas Gruenbacher i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; 28172649f080SLars Ellenberg 2818b30ab791SAndreas Gruenbacher if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 2819b30ab791SAndreas Gruenbacher rs_left = device->ov_left; 28202649f080SLars Ellenberg else 2821b30ab791SAndreas Gruenbacher rs_left = drbd_bm_total_weight(device) - device->rs_failed; 28220f0601f4SLars Ellenberg 2823b30ab791SAndreas Gruenbacher dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ; 28240f0601f4SLars Ellenberg if (!dt) 28250f0601f4SLars Ellenberg dt++; 2826b30ab791SAndreas Gruenbacher db = device->rs_mark_left[i] - rs_left; 28270f0601f4SLars Ellenberg dbdt = Bit2KB(db/dt); 28280f0601f4SLars Ellenberg 2829daeda1ccSPhilipp Reisner if (dbdt > c_min_rate) 2830e8299874SLars Ellenberg return true; 28310f0601f4SLars Ellenberg } 2832e8299874SLars Ellenberg return false; 28330f0601f4SLars Ellenberg } 28340f0601f4SLars Ellenberg 2835bde89a9eSAndreas Gruenbacher static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) 2836b411b363SPhilipp Reisner { 28379f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 2838b30ab791SAndreas Gruenbacher struct drbd_device *device; 2839b411b363SPhilipp Reisner sector_t sector; 28404a76b161SAndreas Gruenbacher sector_t capacity; 2841db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req; 2842b411b363SPhilipp Reisner struct digest_info *di = NULL; 2843b18b37beSPhilipp Reisner int size, verb; 2844b411b363SPhilipp Reisner unsigned int fault_type; 2845e658983aSAndreas Gruenbacher struct p_block_req *p = pi->data; 28464a76b161SAndreas Gruenbacher 28479f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 28489f4fe9adSAndreas Gruenbacher if (!peer_device) 28494a76b161SAndreas Gruenbacher return -EIO; 28509f4fe9adSAndreas Gruenbacher device = peer_device->device; 2851b30ab791SAndreas Gruenbacher capacity = drbd_get_capacity(device->this_bdev); 2852b411b363SPhilipp Reisner 2853b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 2854b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 2855b411b363SPhilipp Reisner 2856c670a398SAndreas Gruenbacher if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { 2857d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2858b411b363SPhilipp Reisner (unsigned long long)sector, size); 285982bc0194SAndreas Gruenbacher return -EINVAL; 2860b411b363SPhilipp Reisner } 2861b411b363SPhilipp Reisner if (sector + (size>>9) > capacity) { 2862d0180171SAndreas Gruenbacher drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, 2863b411b363SPhilipp Reisner (unsigned long long)sector, size); 286482bc0194SAndreas Gruenbacher return -EINVAL; 2865b411b363SPhilipp Reisner } 2866b411b363SPhilipp Reisner 2867b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_UP_TO_DATE)) { 2868b18b37beSPhilipp Reisner verb = 1; 2869e2857216SAndreas Gruenbacher switch (pi->cmd) { 2870b18b37beSPhilipp Reisner case P_DATA_REQUEST: 287169a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); 2872b18b37beSPhilipp Reisner break; 2873700ca8c0SPhilipp Reisner case P_RS_THIN_REQ: 2874b18b37beSPhilipp Reisner case P_RS_DATA_REQUEST: 2875b18b37beSPhilipp Reisner case P_CSUM_RS_REQUEST: 2876b18b37beSPhilipp Reisner case P_OV_REQUEST: 287769a22773SAndreas Gruenbacher drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p); 2878b18b37beSPhilipp Reisner break; 2879b18b37beSPhilipp Reisner case P_OV_REPLY: 2880b18b37beSPhilipp Reisner verb = 0; 2881b30ab791SAndreas Gruenbacher dec_rs_pending(device); 288269a22773SAndreas Gruenbacher drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); 2883b18b37beSPhilipp Reisner break; 2884b18b37beSPhilipp Reisner default: 288549ba9b1bSAndreas Gruenbacher BUG(); 2886b18b37beSPhilipp Reisner } 2887b18b37beSPhilipp Reisner if (verb && __ratelimit(&drbd_ratelimit_state)) 2888d0180171SAndreas Gruenbacher drbd_err(device, "Can not satisfy peer's read request, " 2889b411b363SPhilipp Reisner "no local data.\n"); 2890b18b37beSPhilipp Reisner 2891a821cc4aSLars Ellenberg /* drain possibly payload */ 289269a22773SAndreas Gruenbacher return drbd_drain_block(peer_device, pi->size); 2893b411b363SPhilipp Reisner } 2894b411b363SPhilipp Reisner 2895b411b363SPhilipp Reisner /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2896b411b363SPhilipp Reisner * "criss-cross" setup, that might cause write-out on some other DRBD, 2897b411b363SPhilipp Reisner * which in turn might block on the other node at this very place. */ 2898a0fb3c47SLars Ellenberg peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, 28999104d31aSLars Ellenberg size, GFP_NOIO); 2900db830c46SAndreas Gruenbacher if (!peer_req) { 2901b30ab791SAndreas Gruenbacher put_ldev(device); 290282bc0194SAndreas Gruenbacher return -ENOMEM; 2903b411b363SPhilipp Reisner } 2904b411b363SPhilipp Reisner 2905e2857216SAndreas Gruenbacher switch (pi->cmd) { 2906b411b363SPhilipp Reisner case P_DATA_REQUEST: 2907a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_data_req; 2908b411b363SPhilipp Reisner fault_type = DRBD_FAULT_DT_RD; 290980a40e43SLars Ellenberg /* application IO, don't drbd_rs_begin_io */ 291021ae5d7fSLars Ellenberg peer_req->flags |= EE_APPLICATION; 291180a40e43SLars Ellenberg goto submit; 291280a40e43SLars Ellenberg 2913700ca8c0SPhilipp Reisner case P_RS_THIN_REQ: 2914700ca8c0SPhilipp Reisner /* If at some point in the future we have a smart way to 2915700ca8c0SPhilipp Reisner find out if this data block is completely deallocated, 2916700ca8c0SPhilipp Reisner then we would do something smarter here than reading 2917700ca8c0SPhilipp Reisner the block... */ 2918700ca8c0SPhilipp Reisner peer_req->flags |= EE_RS_THIN_REQ; 2919d769a992SGustavo A. R. Silva /* fall through */ 2920b411b363SPhilipp Reisner case P_RS_DATA_REQUEST: 2921a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_rsdata_req; 2922b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 29235f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2924b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2925b411b363SPhilipp Reisner break; 2926b411b363SPhilipp Reisner 2927b411b363SPhilipp Reisner case P_OV_REPLY: 2928b411b363SPhilipp Reisner case P_CSUM_RS_REQUEST: 2929b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2930e2857216SAndreas Gruenbacher di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO); 2931b411b363SPhilipp Reisner if (!di) 2932b411b363SPhilipp Reisner goto out_free_e; 2933b411b363SPhilipp Reisner 2934e2857216SAndreas Gruenbacher di->digest_size = pi->size; 2935b411b363SPhilipp Reisner di->digest = (((char *)di)+sizeof(struct digest_info)); 2936b411b363SPhilipp Reisner 2937db830c46SAndreas Gruenbacher peer_req->digest = di; 2938db830c46SAndreas Gruenbacher peer_req->flags |= EE_HAS_DIGEST; 2939c36c3cedSLars Ellenberg 29409f4fe9adSAndreas Gruenbacher if (drbd_recv_all(peer_device->connection, di->digest, pi->size)) 2941b411b363SPhilipp Reisner goto out_free_e; 2942b411b363SPhilipp Reisner 2943e2857216SAndreas Gruenbacher if (pi->cmd == P_CSUM_RS_REQUEST) { 29449f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 2945a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_csum_rs_req; 29465f9915bbSLars Ellenberg /* used in the sector offset progress display */ 2947b30ab791SAndreas Gruenbacher device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2948aaaba345SLars Ellenberg /* remember to report stats in drbd_resync_finished */ 2949aaaba345SLars Ellenberg device->use_csums = true; 2950e2857216SAndreas Gruenbacher } else if (pi->cmd == P_OV_REPLY) { 29512649f080SLars Ellenberg /* track progress, we may need to throttle */ 2952b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_in); 2953a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_reply; 2954b30ab791SAndreas Gruenbacher dec_rs_pending(device); 29550f0601f4SLars Ellenberg /* drbd_rs_begin_io done when we sent this request, 29560f0601f4SLars Ellenberg * but accounting still needs to be done. */ 29570f0601f4SLars Ellenberg goto submit_for_resync; 2958b411b363SPhilipp Reisner } 2959b411b363SPhilipp Reisner break; 2960b411b363SPhilipp Reisner 2961b411b363SPhilipp Reisner case P_OV_REQUEST: 2962b30ab791SAndreas Gruenbacher if (device->ov_start_sector == ~(sector_t)0 && 29639f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90) { 2964de228bbaSLars Ellenberg unsigned long now = jiffies; 2965de228bbaSLars Ellenberg int i; 2966b30ab791SAndreas Gruenbacher device->ov_start_sector = sector; 2967b30ab791SAndreas Gruenbacher device->ov_position = sector; 2968b30ab791SAndreas Gruenbacher device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector); 2969b30ab791SAndreas Gruenbacher device->rs_total = device->ov_left; 2970de228bbaSLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 2971b30ab791SAndreas Gruenbacher device->rs_mark_left[i] = device->ov_left; 2972b30ab791SAndreas Gruenbacher device->rs_mark_time[i] = now; 2973de228bbaSLars Ellenberg } 2974d0180171SAndreas Gruenbacher drbd_info(device, "Online Verify start sector: %llu\n", 2975b411b363SPhilipp Reisner (unsigned long long)sector); 2976b411b363SPhilipp Reisner } 2977a8cd15baSAndreas Gruenbacher peer_req->w.cb = w_e_end_ov_req; 2978b411b363SPhilipp Reisner fault_type = DRBD_FAULT_RS_RD; 2979b411b363SPhilipp Reisner break; 2980b411b363SPhilipp Reisner 2981b411b363SPhilipp Reisner default: 298249ba9b1bSAndreas Gruenbacher BUG(); 2983b411b363SPhilipp Reisner } 2984b411b363SPhilipp Reisner 29850f0601f4SLars Ellenberg /* Throttle, drbd_rs_begin_io and submit should become asynchronous 29860f0601f4SLars Ellenberg * wrt the receiver, but it is not as straightforward as it may seem. 29870f0601f4SLars Ellenberg * Various places in the resync start and stop logic assume resync 29880f0601f4SLars Ellenberg * requests are processed in order, requeuing this on the worker thread 29890f0601f4SLars Ellenberg * introduces a bunch of new code for synchronization between threads. 29900f0601f4SLars Ellenberg * 29910f0601f4SLars Ellenberg * Unlimited throttling before drbd_rs_begin_io may stall the resync 29920f0601f4SLars Ellenberg * "forever", throttling after drbd_rs_begin_io will lock that extent 29930f0601f4SLars Ellenberg * for application writes for the same time. For now, just throttle 29940f0601f4SLars Ellenberg * here, where the rest of the code expects the receiver to sleep for 29950f0601f4SLars Ellenberg * a while, anyways. 29960f0601f4SLars Ellenberg */ 2997b411b363SPhilipp Reisner 29980f0601f4SLars Ellenberg /* Throttle before drbd_rs_begin_io, as that locks out application IO; 29990f0601f4SLars Ellenberg * this defers syncer requests for some time, before letting at least 30000f0601f4SLars Ellenberg * on request through. The resync controller on the receiving side 30010f0601f4SLars Ellenberg * will adapt to the incoming rate accordingly. 30020f0601f4SLars Ellenberg * 30030f0601f4SLars Ellenberg * We cannot throttle here if remote is Primary/SyncTarget: 30040f0601f4SLars Ellenberg * we would also throttle its application reads. 30050f0601f4SLars Ellenberg * In that case, throttling is done on the SyncTarget only. 30060f0601f4SLars Ellenberg */ 3007c5a2c150SLars Ellenberg 3008c5a2c150SLars Ellenberg /* Even though this may be a resync request, we do add to "read_ee"; 3009c5a2c150SLars Ellenberg * "sync_ee" is only used for resync WRITEs. 3010c5a2c150SLars Ellenberg * Add to list early, so debugfs can find this request 3011c5a2c150SLars Ellenberg * even if we have to sleep below. */ 3012c5a2c150SLars Ellenberg spin_lock_irq(&device->resource->req_lock); 3013c5a2c150SLars Ellenberg list_add_tail(&peer_req->w.list, &device->read_ee); 3014c5a2c150SLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 3015c5a2c150SLars Ellenberg 3016944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_should_slow_down); 3017ad3fee79SLars Ellenberg if (device->state.peer != R_PRIMARY 3018ad3fee79SLars Ellenberg && drbd_rs_should_slow_down(device, sector, false)) 3019e3555d85SPhilipp Reisner schedule_timeout_uninterruptible(HZ/10); 3020944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_rs_begin_io); 3021b30ab791SAndreas Gruenbacher if (drbd_rs_begin_io(device, sector)) 302280a40e43SLars Ellenberg goto out_free_e; 3023b411b363SPhilipp Reisner 30240f0601f4SLars Ellenberg submit_for_resync: 3025b30ab791SAndreas Gruenbacher atomic_add(size >> 9, &device->rs_sect_ev); 30260f0601f4SLars Ellenberg 302780a40e43SLars Ellenberg submit: 3028944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_submit_peer_request); 3029b30ab791SAndreas Gruenbacher inc_unacked(device); 3030bb3cc85eSMike Christie if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, 3031bb3cc85eSMike Christie fault_type) == 0) 303282bc0194SAndreas Gruenbacher return 0; 3033b411b363SPhilipp Reisner 303410f6d992SLars Ellenberg /* don't care for the reason here */ 3035d0180171SAndreas Gruenbacher drbd_err(device, "submit failed, triggering re-connect\n"); 3036c5a2c150SLars Ellenberg 3037c5a2c150SLars Ellenberg out_free_e: 30380500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 3039a8cd15baSAndreas Gruenbacher list_del(&peer_req->w.list); 30400500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 304122cc37a9SLars Ellenberg /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 304222cc37a9SLars Ellenberg 3043b30ab791SAndreas Gruenbacher put_ldev(device); 3044b30ab791SAndreas Gruenbacher drbd_free_peer_req(device, peer_req); 304582bc0194SAndreas Gruenbacher return -EIO; 3046b411b363SPhilipp Reisner } 3047b411b363SPhilipp Reisner 304869a22773SAndreas Gruenbacher /** 304969a22773SAndreas Gruenbacher * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries 305069a22773SAndreas Gruenbacher */ 305169a22773SAndreas Gruenbacher static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local) 3052b411b363SPhilipp Reisner { 305369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 3054b411b363SPhilipp Reisner int self, peer, rv = -100; 3055b411b363SPhilipp Reisner unsigned long ch_self, ch_peer; 305644ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_0p; 3057b411b363SPhilipp Reisner 3058b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & 1; 3059b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & 1; 3060b411b363SPhilipp Reisner 3061b30ab791SAndreas Gruenbacher ch_peer = device->p_uuid[UI_SIZE]; 3062b30ab791SAndreas Gruenbacher ch_self = device->comm_bm_set; 3063b411b363SPhilipp Reisner 306444ed167dSPhilipp Reisner rcu_read_lock(); 306569a22773SAndreas Gruenbacher after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p; 306644ed167dSPhilipp Reisner rcu_read_unlock(); 306744ed167dSPhilipp Reisner switch (after_sb_0p) { 3068b411b363SPhilipp Reisner case ASB_CONSENSUS: 3069b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 3070b411b363SPhilipp Reisner case ASB_CALL_HELPER: 307144ed167dSPhilipp Reisner case ASB_VIOLENTLY: 3072d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 3073b411b363SPhilipp Reisner break; 3074b411b363SPhilipp Reisner case ASB_DISCONNECT: 3075b411b363SPhilipp Reisner break; 3076b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 3077b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 3078b411b363SPhilipp Reisner rv = -1; 3079b411b363SPhilipp Reisner break; 3080b411b363SPhilipp Reisner } 3081b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 3082b411b363SPhilipp Reisner rv = 1; 3083b411b363SPhilipp Reisner break; 3084b411b363SPhilipp Reisner } 3085e16fb3a8SGustavo A. R. Silva /* Else fall through - to one of the other strategies... */ 3086b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 3087b411b363SPhilipp Reisner if (self == 0 && peer == 1) { 3088b411b363SPhilipp Reisner rv = 1; 3089b411b363SPhilipp Reisner break; 3090b411b363SPhilipp Reisner } 3091b411b363SPhilipp Reisner if (self == 1 && peer == 0) { 3092b411b363SPhilipp Reisner rv = -1; 3093b411b363SPhilipp Reisner break; 3094b411b363SPhilipp Reisner } 3095b411b363SPhilipp Reisner /* Else fall through to one of the other strategies... */ 3096d0180171SAndreas Gruenbacher drbd_warn(device, "Discard younger/older primary did not find a decision\n" 3097b411b363SPhilipp Reisner "Using discard-least-changes instead\n"); 3098d769a992SGustavo A. R. Silva /* fall through */ 3099b411b363SPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 3100b411b363SPhilipp Reisner if (ch_peer == 0 && ch_self == 0) { 310169a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 3102b411b363SPhilipp Reisner ? -1 : 1; 3103b411b363SPhilipp Reisner break; 3104b411b363SPhilipp Reisner } else { 3105b411b363SPhilipp Reisner if (ch_peer == 0) { rv = 1; break; } 3106b411b363SPhilipp Reisner if (ch_self == 0) { rv = -1; break; } 3107b411b363SPhilipp Reisner } 310844ed167dSPhilipp Reisner if (after_sb_0p == ASB_DISCARD_ZERO_CHG) 3109b411b363SPhilipp Reisner break; 3110e16fb3a8SGustavo A. R. Silva /* else, fall through */ 3111b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 3112b411b363SPhilipp Reisner if (ch_self < ch_peer) 3113b411b363SPhilipp Reisner rv = -1; 3114b411b363SPhilipp Reisner else if (ch_self > ch_peer) 3115b411b363SPhilipp Reisner rv = 1; 3116b411b363SPhilipp Reisner else /* ( ch_self == ch_peer ) */ 3117b411b363SPhilipp Reisner /* Well, then use something else. */ 311869a22773SAndreas Gruenbacher rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) 3119b411b363SPhilipp Reisner ? -1 : 1; 3120b411b363SPhilipp Reisner break; 3121b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 3122b411b363SPhilipp Reisner rv = -1; 3123b411b363SPhilipp Reisner break; 3124b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 3125b411b363SPhilipp Reisner rv = 1; 3126b411b363SPhilipp Reisner } 3127b411b363SPhilipp Reisner 3128b411b363SPhilipp Reisner return rv; 3129b411b363SPhilipp Reisner } 3130b411b363SPhilipp Reisner 313169a22773SAndreas Gruenbacher /** 313269a22773SAndreas Gruenbacher * drbd_asb_recover_1p - Recover after split-brain with one remaining primary 313369a22773SAndreas Gruenbacher */ 313469a22773SAndreas Gruenbacher static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local) 3135b411b363SPhilipp Reisner { 313669a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 31376184ea21SAndreas Gruenbacher int hg, rv = -100; 313844ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_1p; 3139b411b363SPhilipp Reisner 314044ed167dSPhilipp Reisner rcu_read_lock(); 314169a22773SAndreas Gruenbacher after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p; 314244ed167dSPhilipp Reisner rcu_read_unlock(); 314344ed167dSPhilipp Reisner switch (after_sb_1p) { 3144b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 3145b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 3146b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 3147b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 3148b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 314944ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 3150d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 3151b411b363SPhilipp Reisner break; 3152b411b363SPhilipp Reisner case ASB_DISCONNECT: 3153b411b363SPhilipp Reisner break; 3154b411b363SPhilipp Reisner case ASB_CONSENSUS: 315569a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3156b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_SECONDARY) 3157b411b363SPhilipp Reisner rv = hg; 3158b30ab791SAndreas Gruenbacher if (hg == 1 && device->state.role == R_PRIMARY) 3159b411b363SPhilipp Reisner rv = hg; 3160b411b363SPhilipp Reisner break; 3161b411b363SPhilipp Reisner case ASB_VIOLENTLY: 316269a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 3163b411b363SPhilipp Reisner break; 3164b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 3165b30ab791SAndreas Gruenbacher return device->state.role == R_PRIMARY ? 1 : -1; 3166b411b363SPhilipp Reisner case ASB_CALL_HELPER: 316769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3168b30ab791SAndreas Gruenbacher if (hg == -1 && device->state.role == R_PRIMARY) { 3169bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 3170bb437946SAndreas Gruenbacher 3171b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 3172b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 3173b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 3174b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 3175bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 3176b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 3177b411b363SPhilipp Reisner } else { 3178d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 3179b411b363SPhilipp Reisner rv = hg; 3180b411b363SPhilipp Reisner } 3181b411b363SPhilipp Reisner } else 3182b411b363SPhilipp Reisner rv = hg; 3183b411b363SPhilipp Reisner } 3184b411b363SPhilipp Reisner 3185b411b363SPhilipp Reisner return rv; 3186b411b363SPhilipp Reisner } 3187b411b363SPhilipp Reisner 318869a22773SAndreas Gruenbacher /** 318969a22773SAndreas Gruenbacher * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries 319069a22773SAndreas Gruenbacher */ 319169a22773SAndreas Gruenbacher static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local) 3192b411b363SPhilipp Reisner { 319369a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 31946184ea21SAndreas Gruenbacher int hg, rv = -100; 319544ed167dSPhilipp Reisner enum drbd_after_sb_p after_sb_2p; 3196b411b363SPhilipp Reisner 319744ed167dSPhilipp Reisner rcu_read_lock(); 319869a22773SAndreas Gruenbacher after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p; 319944ed167dSPhilipp Reisner rcu_read_unlock(); 320044ed167dSPhilipp Reisner switch (after_sb_2p) { 3201b411b363SPhilipp Reisner case ASB_DISCARD_YOUNGER_PRI: 3202b411b363SPhilipp Reisner case ASB_DISCARD_OLDER_PRI: 3203b411b363SPhilipp Reisner case ASB_DISCARD_LEAST_CHG: 3204b411b363SPhilipp Reisner case ASB_DISCARD_LOCAL: 3205b411b363SPhilipp Reisner case ASB_DISCARD_REMOTE: 3206b411b363SPhilipp Reisner case ASB_CONSENSUS: 3207b411b363SPhilipp Reisner case ASB_DISCARD_SECONDARY: 320844ed167dSPhilipp Reisner case ASB_DISCARD_ZERO_CHG: 3209d0180171SAndreas Gruenbacher drbd_err(device, "Configuration error.\n"); 3210b411b363SPhilipp Reisner break; 3211b411b363SPhilipp Reisner case ASB_VIOLENTLY: 321269a22773SAndreas Gruenbacher rv = drbd_asb_recover_0p(peer_device); 3213b411b363SPhilipp Reisner break; 3214b411b363SPhilipp Reisner case ASB_DISCONNECT: 3215b411b363SPhilipp Reisner break; 3216b411b363SPhilipp Reisner case ASB_CALL_HELPER: 321769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3218b411b363SPhilipp Reisner if (hg == -1) { 3219bb437946SAndreas Gruenbacher enum drbd_state_rv rv2; 3220bb437946SAndreas Gruenbacher 3221b411b363SPhilipp Reisner /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, 3222b411b363SPhilipp Reisner * we might be here in C_WF_REPORT_PARAMS which is transient. 3223b411b363SPhilipp Reisner * we do not need to wait for the after state change work either. */ 3224b30ab791SAndreas Gruenbacher rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY)); 3225bb437946SAndreas Gruenbacher if (rv2 != SS_SUCCESS) { 3226b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost-after-sb"); 3227b411b363SPhilipp Reisner } else { 3228d0180171SAndreas Gruenbacher drbd_warn(device, "Successfully gave up primary role.\n"); 3229b411b363SPhilipp Reisner rv = hg; 3230b411b363SPhilipp Reisner } 3231b411b363SPhilipp Reisner } else 3232b411b363SPhilipp Reisner rv = hg; 3233b411b363SPhilipp Reisner } 3234b411b363SPhilipp Reisner 3235b411b363SPhilipp Reisner return rv; 3236b411b363SPhilipp Reisner } 3237b411b363SPhilipp Reisner 3238b30ab791SAndreas Gruenbacher static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, 3239b411b363SPhilipp Reisner u64 bits, u64 flags) 3240b411b363SPhilipp Reisner { 3241b411b363SPhilipp Reisner if (!uuid) { 3242d0180171SAndreas Gruenbacher drbd_info(device, "%s uuid info vanished while I was looking!\n", text); 3243b411b363SPhilipp Reisner return; 3244b411b363SPhilipp Reisner } 3245d0180171SAndreas Gruenbacher drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", 3246b411b363SPhilipp Reisner text, 3247b411b363SPhilipp Reisner (unsigned long long)uuid[UI_CURRENT], 3248b411b363SPhilipp Reisner (unsigned long long)uuid[UI_BITMAP], 3249b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_START], 3250b411b363SPhilipp Reisner (unsigned long long)uuid[UI_HISTORY_END], 3251b411b363SPhilipp Reisner (unsigned long long)bits, 3252b411b363SPhilipp Reisner (unsigned long long)flags); 3253b411b363SPhilipp Reisner } 3254b411b363SPhilipp Reisner 3255b411b363SPhilipp Reisner /* 3256b411b363SPhilipp Reisner 100 after split brain try auto recover 3257b411b363SPhilipp Reisner 2 C_SYNC_SOURCE set BitMap 3258b411b363SPhilipp Reisner 1 C_SYNC_SOURCE use BitMap 3259b411b363SPhilipp Reisner 0 no Sync 3260b411b363SPhilipp Reisner -1 C_SYNC_TARGET use BitMap 3261b411b363SPhilipp Reisner -2 C_SYNC_TARGET set BitMap 3262b411b363SPhilipp Reisner -100 after split brain, disconnect 3263b411b363SPhilipp Reisner -1000 unrelated data 32644a23f264SPhilipp Reisner -1091 requires proto 91 32654a23f264SPhilipp Reisner -1096 requires proto 96 3266b411b363SPhilipp Reisner */ 3267f2d3d75bSLars Ellenberg 3268f2d3d75bSLars Ellenberg static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) 3269b411b363SPhilipp Reisner { 327044a4d551SLars Ellenberg struct drbd_peer_device *const peer_device = first_peer_device(device); 327144a4d551SLars Ellenberg struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 3272b411b363SPhilipp Reisner u64 self, peer; 3273b411b363SPhilipp Reisner int i, j; 3274b411b363SPhilipp Reisner 3275b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 3276b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3277b411b363SPhilipp Reisner 3278b411b363SPhilipp Reisner *rule_nr = 10; 3279b411b363SPhilipp Reisner if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) 3280b411b363SPhilipp Reisner return 0; 3281b411b363SPhilipp Reisner 3282b411b363SPhilipp Reisner *rule_nr = 20; 3283b411b363SPhilipp Reisner if ((self == UUID_JUST_CREATED || self == (u64)0) && 3284b411b363SPhilipp Reisner peer != UUID_JUST_CREATED) 3285b411b363SPhilipp Reisner return -2; 3286b411b363SPhilipp Reisner 3287b411b363SPhilipp Reisner *rule_nr = 30; 3288b411b363SPhilipp Reisner if (self != UUID_JUST_CREATED && 3289b411b363SPhilipp Reisner (peer == UUID_JUST_CREATED || peer == (u64)0)) 3290b411b363SPhilipp Reisner return 2; 3291b411b363SPhilipp Reisner 3292b411b363SPhilipp Reisner if (self == peer) { 3293b411b363SPhilipp Reisner int rct, dc; /* roles at crash time */ 3294b411b363SPhilipp Reisner 3295b30ab791SAndreas Gruenbacher if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 3296b411b363SPhilipp Reisner 329744a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 32984a23f264SPhilipp Reisner return -1091; 3299b411b363SPhilipp Reisner 3300b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 3301b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) { 3302d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n"); 3303b30ab791SAndreas Gruenbacher drbd_uuid_move_history(device); 3304b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP]; 3305b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_BITMAP] = 0; 3306b411b363SPhilipp Reisner 3307b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 3308b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 3309b411b363SPhilipp Reisner *rule_nr = 34; 3310b411b363SPhilipp Reisner } else { 3311d0180171SAndreas Gruenbacher drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n"); 3312b411b363SPhilipp Reisner *rule_nr = 36; 3313b411b363SPhilipp Reisner } 3314b411b363SPhilipp Reisner 3315b411b363SPhilipp Reisner return 1; 3316b411b363SPhilipp Reisner } 3317b411b363SPhilipp Reisner 3318b30ab791SAndreas Gruenbacher if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 3319b411b363SPhilipp Reisner 332044a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 33214a23f264SPhilipp Reisner return -1091; 3322b411b363SPhilipp Reisner 3323b30ab791SAndreas Gruenbacher if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 3324b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) { 3325d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n"); 3326b411b363SPhilipp Reisner 3327b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START]; 3328b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP]; 3329b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = 0UL; 3330b411b363SPhilipp Reisner 3331b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 3332b411b363SPhilipp Reisner *rule_nr = 35; 3333b411b363SPhilipp Reisner } else { 3334d0180171SAndreas Gruenbacher drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n"); 3335b411b363SPhilipp Reisner *rule_nr = 37; 3336b411b363SPhilipp Reisner } 3337b411b363SPhilipp Reisner 3338b411b363SPhilipp Reisner return -1; 3339b411b363SPhilipp Reisner } 3340b411b363SPhilipp Reisner 3341b411b363SPhilipp Reisner /* Common power [off|failure] */ 3342b30ab791SAndreas Gruenbacher rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) + 3343b30ab791SAndreas Gruenbacher (device->p_uuid[UI_FLAGS] & 2); 3344b411b363SPhilipp Reisner /* lowest bit is set when we were primary, 3345b411b363SPhilipp Reisner * next bit (weight 2) is set when peer was primary */ 3346b411b363SPhilipp Reisner *rule_nr = 40; 3347b411b363SPhilipp Reisner 3348f2d3d75bSLars Ellenberg /* Neither has the "crashed primary" flag set, 3349f2d3d75bSLars Ellenberg * only a replication link hickup. */ 3350f2d3d75bSLars Ellenberg if (rct == 0) 3351f2d3d75bSLars Ellenberg return 0; 3352f2d3d75bSLars Ellenberg 3353f2d3d75bSLars Ellenberg /* Current UUID equal and no bitmap uuid; does not necessarily 3354f2d3d75bSLars Ellenberg * mean this was a "simultaneous hard crash", maybe IO was 3355f2d3d75bSLars Ellenberg * frozen, so no UUID-bump happened. 3356f2d3d75bSLars Ellenberg * This is a protocol change, overload DRBD_FF_WSAME as flag 3357f2d3d75bSLars Ellenberg * for "new-enough" peer DRBD version. */ 3358f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) { 3359f2d3d75bSLars Ellenberg *rule_nr = 41; 3360f2d3d75bSLars Ellenberg if (!(connection->agreed_features & DRBD_FF_WSAME)) { 3361f2d3d75bSLars Ellenberg drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n"); 3362f2d3d75bSLars Ellenberg return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8)); 3363f2d3d75bSLars Ellenberg } 3364f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) { 3365f2d3d75bSLars Ellenberg /* At least one has the "crashed primary" bit set, 3366f2d3d75bSLars Ellenberg * both are primary now, but neither has rotated its UUIDs? 3367f2d3d75bSLars Ellenberg * "Can not happen." */ 3368f2d3d75bSLars Ellenberg drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n"); 3369f2d3d75bSLars Ellenberg return -100; 3370f2d3d75bSLars Ellenberg } 3371f2d3d75bSLars Ellenberg if (device->state.role == R_PRIMARY) 3372f2d3d75bSLars Ellenberg return 1; 3373f2d3d75bSLars Ellenberg return -1; 3374f2d3d75bSLars Ellenberg } 3375f2d3d75bSLars Ellenberg 3376f2d3d75bSLars Ellenberg /* Both are secondary. 3377f2d3d75bSLars Ellenberg * Really looks like recovery from simultaneous hard crash. 3378f2d3d75bSLars Ellenberg * Check which had been primary before, and arbitrate. */ 3379b411b363SPhilipp Reisner switch (rct) { 3380f2d3d75bSLars Ellenberg case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */ 3381b411b363SPhilipp Reisner case 1: /* self_pri && !peer_pri */ return 1; 3382b411b363SPhilipp Reisner case 2: /* !self_pri && peer_pri */ return -1; 3383b411b363SPhilipp Reisner case 3: /* self_pri && peer_pri */ 338444a4d551SLars Ellenberg dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); 3385b411b363SPhilipp Reisner return dc ? -1 : 1; 3386b411b363SPhilipp Reisner } 3387b411b363SPhilipp Reisner } 3388b411b363SPhilipp Reisner 3389b411b363SPhilipp Reisner *rule_nr = 50; 3390b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 3391b411b363SPhilipp Reisner if (self == peer) 3392b411b363SPhilipp Reisner return -1; 3393b411b363SPhilipp Reisner 3394b411b363SPhilipp Reisner *rule_nr = 51; 3395b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 3396b411b363SPhilipp Reisner if (self == peer) { 339744a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 3398b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 3399b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 3400b30ab791SAndreas Gruenbacher peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 3401b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 3402b411b363SPhilipp Reisner resync as sync source modifications of the peer's UUIDs. */ 3403b411b363SPhilipp Reisner 340444a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 34054a23f264SPhilipp Reisner return -1091; 3406b411b363SPhilipp Reisner 3407b30ab791SAndreas Gruenbacher device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 3408b30ab791SAndreas Gruenbacher device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1]; 34094a23f264SPhilipp Reisner 3410d0180171SAndreas Gruenbacher drbd_info(device, "Lost last syncUUID packet, corrected:\n"); 3411b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 34124a23f264SPhilipp Reisner 3413b411b363SPhilipp Reisner return -1; 3414b411b363SPhilipp Reisner } 3415b411b363SPhilipp Reisner } 3416b411b363SPhilipp Reisner 3417b411b363SPhilipp Reisner *rule_nr = 60; 3418b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1); 3419b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3420b30ab791SAndreas Gruenbacher peer = device->p_uuid[i] & ~((u64)1); 3421b411b363SPhilipp Reisner if (self == peer) 3422b411b363SPhilipp Reisner return -2; 3423b411b363SPhilipp Reisner } 3424b411b363SPhilipp Reisner 3425b411b363SPhilipp Reisner *rule_nr = 70; 3426b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3427b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3428b411b363SPhilipp Reisner if (self == peer) 3429b411b363SPhilipp Reisner return 1; 3430b411b363SPhilipp Reisner 3431b411b363SPhilipp Reisner *rule_nr = 71; 3432b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 3433b411b363SPhilipp Reisner if (self == peer) { 343444a4d551SLars Ellenberg if (connection->agreed_pro_version < 96 ? 3435b30ab791SAndreas Gruenbacher (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 3436b30ab791SAndreas Gruenbacher (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 3437b30ab791SAndreas Gruenbacher self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 3438b411b363SPhilipp Reisner /* The last P_SYNC_UUID did not get though. Undo the last start of 3439b411b363SPhilipp Reisner resync as sync source modifications of our UUIDs. */ 3440b411b363SPhilipp Reisner 344144a4d551SLars Ellenberg if (connection->agreed_pro_version < 91) 34424a23f264SPhilipp Reisner return -1091; 3443b411b363SPhilipp Reisner 3444b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 3445b30ab791SAndreas Gruenbacher __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]); 3446b411b363SPhilipp Reisner 3447d0180171SAndreas Gruenbacher drbd_info(device, "Last syncUUID did not get through, corrected:\n"); 3448b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, 3449b30ab791SAndreas Gruenbacher device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0); 3450b411b363SPhilipp Reisner 3451b411b363SPhilipp Reisner return 1; 3452b411b363SPhilipp Reisner } 3453b411b363SPhilipp Reisner } 3454b411b363SPhilipp Reisner 3455b411b363SPhilipp Reisner 3456b411b363SPhilipp Reisner *rule_nr = 80; 3457b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_CURRENT] & ~((u64)1); 3458b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3459b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3460b411b363SPhilipp Reisner if (self == peer) 3461b411b363SPhilipp Reisner return 2; 3462b411b363SPhilipp Reisner } 3463b411b363SPhilipp Reisner 3464b411b363SPhilipp Reisner *rule_nr = 90; 3465b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1); 3466b30ab791SAndreas Gruenbacher peer = device->p_uuid[UI_BITMAP] & ~((u64)1); 3467b411b363SPhilipp Reisner if (self == peer && self != ((u64)0)) 3468b411b363SPhilipp Reisner return 100; 3469b411b363SPhilipp Reisner 3470b411b363SPhilipp Reisner *rule_nr = 100; 3471b411b363SPhilipp Reisner for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { 3472b30ab791SAndreas Gruenbacher self = device->ldev->md.uuid[i] & ~((u64)1); 3473b411b363SPhilipp Reisner for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { 3474b30ab791SAndreas Gruenbacher peer = device->p_uuid[j] & ~((u64)1); 3475b411b363SPhilipp Reisner if (self == peer) 3476b411b363SPhilipp Reisner return -100; 3477b411b363SPhilipp Reisner } 3478b411b363SPhilipp Reisner } 3479b411b363SPhilipp Reisner 3480b411b363SPhilipp Reisner return -1000; 3481b411b363SPhilipp Reisner } 3482b411b363SPhilipp Reisner 3483b411b363SPhilipp Reisner /* drbd_sync_handshake() returns the new conn state on success, or 3484b411b363SPhilipp Reisner CONN_MASK (-1) on failure. 3485b411b363SPhilipp Reisner */ 348669a22773SAndreas Gruenbacher static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, 348769a22773SAndreas Gruenbacher enum drbd_role peer_role, 3488b411b363SPhilipp Reisner enum drbd_disk_state peer_disk) __must_hold(local) 3489b411b363SPhilipp Reisner { 349069a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 3491b411b363SPhilipp Reisner enum drbd_conns rv = C_MASK; 3492b411b363SPhilipp Reisner enum drbd_disk_state mydisk; 349344ed167dSPhilipp Reisner struct net_conf *nc; 3494d29e89e3SRoland Kammerer int hg, rule_nr, rr_conflict, tentative, always_asbp; 3495b411b363SPhilipp Reisner 3496b30ab791SAndreas Gruenbacher mydisk = device->state.disk; 3497b411b363SPhilipp Reisner if (mydisk == D_NEGOTIATING) 3498b30ab791SAndreas Gruenbacher mydisk = device->new_state_tmp.disk; 3499b411b363SPhilipp Reisner 3500d0180171SAndreas Gruenbacher drbd_info(device, "drbd_sync_handshake:\n"); 35019f2247bbSPhilipp Reisner 3502b30ab791SAndreas Gruenbacher spin_lock_irq(&device->ldev->md.uuid_lock); 3503b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0); 3504b30ab791SAndreas Gruenbacher drbd_uuid_dump(device, "peer", device->p_uuid, 3505b30ab791SAndreas Gruenbacher device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); 3506b411b363SPhilipp Reisner 3507f2d3d75bSLars Ellenberg hg = drbd_uuid_compare(device, peer_role, &rule_nr); 3508b30ab791SAndreas Gruenbacher spin_unlock_irq(&device->ldev->md.uuid_lock); 3509b411b363SPhilipp Reisner 3510d0180171SAndreas Gruenbacher drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); 3511b411b363SPhilipp Reisner 3512b411b363SPhilipp Reisner if (hg == -1000) { 3513d0180171SAndreas Gruenbacher drbd_alert(device, "Unrelated data, aborting!\n"); 3514b411b363SPhilipp Reisner return C_MASK; 3515b411b363SPhilipp Reisner } 3516f2d3d75bSLars Ellenberg if (hg < -0x10000) { 3517f2d3d75bSLars Ellenberg int proto, fflags; 3518f2d3d75bSLars Ellenberg hg = -hg; 3519f2d3d75bSLars Ellenberg proto = hg & 0xff; 3520f2d3d75bSLars Ellenberg fflags = (hg >> 8) & 0xff; 3521f2d3d75bSLars Ellenberg drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n", 3522f2d3d75bSLars Ellenberg proto, fflags); 3523f2d3d75bSLars Ellenberg return C_MASK; 3524f2d3d75bSLars Ellenberg } 35254a23f264SPhilipp Reisner if (hg < -1000) { 3526d0180171SAndreas Gruenbacher drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); 3527b411b363SPhilipp Reisner return C_MASK; 3528b411b363SPhilipp Reisner } 3529b411b363SPhilipp Reisner 3530b411b363SPhilipp Reisner if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || 3531b411b363SPhilipp Reisner (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { 3532b411b363SPhilipp Reisner int f = (hg == -100) || abs(hg) == 2; 3533b411b363SPhilipp Reisner hg = mydisk > D_INCONSISTENT ? 1 : -1; 3534b411b363SPhilipp Reisner if (f) 3535b411b363SPhilipp Reisner hg = hg*2; 3536d0180171SAndreas Gruenbacher drbd_info(device, "Becoming sync %s due to disk states.\n", 3537b411b363SPhilipp Reisner hg > 0 ? "source" : "target"); 3538b411b363SPhilipp Reisner } 3539b411b363SPhilipp Reisner 35403a11a487SAdam Gandelman if (abs(hg) == 100) 3541b30ab791SAndreas Gruenbacher drbd_khelper(device, "initial-split-brain"); 35423a11a487SAdam Gandelman 354344ed167dSPhilipp Reisner rcu_read_lock(); 354469a22773SAndreas Gruenbacher nc = rcu_dereference(peer_device->connection->net_conf); 3545d29e89e3SRoland Kammerer always_asbp = nc->always_asbp; 3546d29e89e3SRoland Kammerer rr_conflict = nc->rr_conflict; 3547d29e89e3SRoland Kammerer tentative = nc->tentative; 3548d29e89e3SRoland Kammerer rcu_read_unlock(); 354944ed167dSPhilipp Reisner 3550d29e89e3SRoland Kammerer if (hg == 100 || (hg == -100 && always_asbp)) { 3551b30ab791SAndreas Gruenbacher int pcount = (device->state.role == R_PRIMARY) 3552b411b363SPhilipp Reisner + (peer_role == R_PRIMARY); 3553b411b363SPhilipp Reisner int forced = (hg == -100); 3554b411b363SPhilipp Reisner 3555b411b363SPhilipp Reisner switch (pcount) { 3556b411b363SPhilipp Reisner case 0: 355769a22773SAndreas Gruenbacher hg = drbd_asb_recover_0p(peer_device); 3558b411b363SPhilipp Reisner break; 3559b411b363SPhilipp Reisner case 1: 356069a22773SAndreas Gruenbacher hg = drbd_asb_recover_1p(peer_device); 3561b411b363SPhilipp Reisner break; 3562b411b363SPhilipp Reisner case 2: 356369a22773SAndreas Gruenbacher hg = drbd_asb_recover_2p(peer_device); 3564b411b363SPhilipp Reisner break; 3565b411b363SPhilipp Reisner } 3566b411b363SPhilipp Reisner if (abs(hg) < 100) { 3567d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, %d primaries, " 3568b411b363SPhilipp Reisner "automatically solved. Sync from %s node\n", 3569b411b363SPhilipp Reisner pcount, (hg < 0) ? "peer" : "this"); 3570b411b363SPhilipp Reisner if (forced) { 3571d0180171SAndreas Gruenbacher drbd_warn(device, "Doing a full sync, since" 3572b411b363SPhilipp Reisner " UUIDs where ambiguous.\n"); 3573b411b363SPhilipp Reisner hg = hg*2; 3574b411b363SPhilipp Reisner } 3575b411b363SPhilipp Reisner } 3576b411b363SPhilipp Reisner } 3577b411b363SPhilipp Reisner 3578b411b363SPhilipp Reisner if (hg == -100) { 3579b30ab791SAndreas Gruenbacher if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1)) 3580b411b363SPhilipp Reisner hg = -1; 3581b30ab791SAndreas Gruenbacher if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1)) 3582b411b363SPhilipp Reisner hg = 1; 3583b411b363SPhilipp Reisner 3584b411b363SPhilipp Reisner if (abs(hg) < 100) 3585d0180171SAndreas Gruenbacher drbd_warn(device, "Split-Brain detected, manually solved. " 3586b411b363SPhilipp Reisner "Sync from %s node\n", 3587b411b363SPhilipp Reisner (hg < 0) ? "peer" : "this"); 3588b411b363SPhilipp Reisner } 3589b411b363SPhilipp Reisner 3590b411b363SPhilipp Reisner if (hg == -100) { 3591580b9767SLars Ellenberg /* FIXME this log message is not correct if we end up here 3592580b9767SLars Ellenberg * after an attempted attach on a diskless node. 3593580b9767SLars Ellenberg * We just refuse to attach -- well, we drop the "connection" 3594580b9767SLars Ellenberg * to that disk, in a way... */ 3595d0180171SAndreas Gruenbacher drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n"); 3596b30ab791SAndreas Gruenbacher drbd_khelper(device, "split-brain"); 3597b411b363SPhilipp Reisner return C_MASK; 3598b411b363SPhilipp Reisner } 3599b411b363SPhilipp Reisner 3600b411b363SPhilipp Reisner if (hg > 0 && mydisk <= D_INCONSISTENT) { 3601d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n"); 3602b411b363SPhilipp Reisner return C_MASK; 3603b411b363SPhilipp Reisner } 3604b411b363SPhilipp Reisner 3605b411b363SPhilipp Reisner if (hg < 0 && /* by intention we do not use mydisk here. */ 3606b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) { 360744ed167dSPhilipp Reisner switch (rr_conflict) { 3608b411b363SPhilipp Reisner case ASB_CALL_HELPER: 3609b30ab791SAndreas Gruenbacher drbd_khelper(device, "pri-lost"); 3610b411b363SPhilipp Reisner /* fall through */ 3611b411b363SPhilipp Reisner case ASB_DISCONNECT: 3612d0180171SAndreas Gruenbacher drbd_err(device, "I shall become SyncTarget, but I am primary!\n"); 3613b411b363SPhilipp Reisner return C_MASK; 3614b411b363SPhilipp Reisner case ASB_VIOLENTLY: 3615d0180171SAndreas Gruenbacher drbd_warn(device, "Becoming SyncTarget, violating the stable-data" 3616b411b363SPhilipp Reisner "assumption\n"); 3617b411b363SPhilipp Reisner } 3618b411b363SPhilipp Reisner } 3619b411b363SPhilipp Reisner 362069a22773SAndreas Gruenbacher if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) { 3621cf14c2e9SPhilipp Reisner if (hg == 0) 3622d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n"); 3623cf14c2e9SPhilipp Reisner else 3624d0180171SAndreas Gruenbacher drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.", 3625cf14c2e9SPhilipp Reisner drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), 3626cf14c2e9SPhilipp Reisner abs(hg) >= 2 ? "full" : "bit-map based"); 3627cf14c2e9SPhilipp Reisner return C_MASK; 3628cf14c2e9SPhilipp Reisner } 3629cf14c2e9SPhilipp Reisner 3630b411b363SPhilipp Reisner if (abs(hg) >= 2) { 3631d0180171SAndreas Gruenbacher drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 3632b30ab791SAndreas Gruenbacher if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", 363320ceb2b2SLars Ellenberg BM_LOCKED_SET_ALLOWED)) 3634b411b363SPhilipp Reisner return C_MASK; 3635b411b363SPhilipp Reisner } 3636b411b363SPhilipp Reisner 3637b411b363SPhilipp Reisner if (hg > 0) { /* become sync source. */ 3638b411b363SPhilipp Reisner rv = C_WF_BITMAP_S; 3639b411b363SPhilipp Reisner } else if (hg < 0) { /* become sync target */ 3640b411b363SPhilipp Reisner rv = C_WF_BITMAP_T; 3641b411b363SPhilipp Reisner } else { 3642b411b363SPhilipp Reisner rv = C_CONNECTED; 3643b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device)) { 3644d0180171SAndreas Gruenbacher drbd_info(device, "No resync, but %lu bits in bitmap!\n", 3645b30ab791SAndreas Gruenbacher drbd_bm_total_weight(device)); 3646b411b363SPhilipp Reisner } 3647b411b363SPhilipp Reisner } 3648b411b363SPhilipp Reisner 3649b411b363SPhilipp Reisner return rv; 3650b411b363SPhilipp Reisner } 3651b411b363SPhilipp Reisner 3652f179d76dSPhilipp Reisner static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer) 3653b411b363SPhilipp Reisner { 3654b411b363SPhilipp Reisner /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ 3655f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_REMOTE) 3656f179d76dSPhilipp Reisner return ASB_DISCARD_LOCAL; 3657b411b363SPhilipp Reisner 3658b411b363SPhilipp Reisner /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ 3659f179d76dSPhilipp Reisner if (peer == ASB_DISCARD_LOCAL) 3660f179d76dSPhilipp Reisner return ASB_DISCARD_REMOTE; 3661b411b363SPhilipp Reisner 3662b411b363SPhilipp Reisner /* everything else is valid if they are equal on both sides. */ 3663f179d76dSPhilipp Reisner return peer; 3664b411b363SPhilipp Reisner } 3665b411b363SPhilipp Reisner 3666bde89a9eSAndreas Gruenbacher static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi) 3667b411b363SPhilipp Reisner { 3668e658983aSAndreas Gruenbacher struct p_protocol *p = pi->data; 3669036b17eaSPhilipp Reisner enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 3670036b17eaSPhilipp Reisner int p_proto, p_discard_my_data, p_two_primaries, cf; 3671036b17eaSPhilipp Reisner struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; 3672036b17eaSPhilipp Reisner char integrity_alg[SHARED_SECRET_MAX] = ""; 36733d0e6375SKees Cook struct crypto_shash *peer_integrity_tfm = NULL; 36747aca6c75SPhilipp Reisner void *int_dig_in = NULL, *int_dig_vv = NULL; 3675b411b363SPhilipp Reisner 3676b411b363SPhilipp Reisner p_proto = be32_to_cpu(p->protocol); 3677b411b363SPhilipp Reisner p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 3678b411b363SPhilipp Reisner p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 3679b411b363SPhilipp Reisner p_after_sb_2p = be32_to_cpu(p->after_sb_2p); 3680b411b363SPhilipp Reisner p_two_primaries = be32_to_cpu(p->two_primaries); 3681cf14c2e9SPhilipp Reisner cf = be32_to_cpu(p->conn_flags); 36826139f60dSAndreas Gruenbacher p_discard_my_data = cf & CF_DISCARD_MY_DATA; 3683cf14c2e9SPhilipp Reisner 3684bde89a9eSAndreas Gruenbacher if (connection->agreed_pro_version >= 87) { 368586db0618SAndreas Gruenbacher int err; 368686db0618SAndreas Gruenbacher 368788104ca4SAndreas Gruenbacher if (pi->size > sizeof(integrity_alg)) 368886db0618SAndreas Gruenbacher return -EIO; 3689bde89a9eSAndreas Gruenbacher err = drbd_recv_all(connection, integrity_alg, pi->size); 369086db0618SAndreas Gruenbacher if (err) 369186db0618SAndreas Gruenbacher return err; 369288104ca4SAndreas Gruenbacher integrity_alg[SHARED_SECRET_MAX - 1] = 0; 3693036b17eaSPhilipp Reisner } 369486db0618SAndreas Gruenbacher 36957d4c782cSAndreas Gruenbacher if (pi->cmd != P_PROTOCOL_UPDATE) { 3696bde89a9eSAndreas Gruenbacher clear_bit(CONN_DRY_RUN, &connection->flags); 3697cf14c2e9SPhilipp Reisner 3698cf14c2e9SPhilipp Reisner if (cf & CF_DRY_RUN) 3699bde89a9eSAndreas Gruenbacher set_bit(CONN_DRY_RUN, &connection->flags); 3700b411b363SPhilipp Reisner 370144ed167dSPhilipp Reisner rcu_read_lock(); 3702bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 370344ed167dSPhilipp Reisner 3704036b17eaSPhilipp Reisner if (p_proto != nc->wire_protocol) { 37051ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "protocol"); 370644ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3707b411b363SPhilipp Reisner } 3708b411b363SPhilipp Reisner 3709f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) { 37101ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri"); 371144ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3712b411b363SPhilipp Reisner } 3713b411b363SPhilipp Reisner 3714f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) { 37151ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri"); 371644ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3717b411b363SPhilipp Reisner } 3718b411b363SPhilipp Reisner 3719f179d76dSPhilipp Reisner if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) { 37201ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri"); 372144ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3722b411b363SPhilipp Reisner } 3723b411b363SPhilipp Reisner 37246139f60dSAndreas Gruenbacher if (p_discard_my_data && nc->discard_my_data) { 37251ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "discard-my-data"); 372644ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3727b411b363SPhilipp Reisner } 3728b411b363SPhilipp Reisner 372944ed167dSPhilipp Reisner if (p_two_primaries != nc->two_primaries) { 37301ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries"); 373144ed167dSPhilipp Reisner goto disconnect_rcu_unlock; 3732b411b363SPhilipp Reisner } 3733b411b363SPhilipp Reisner 3734036b17eaSPhilipp Reisner if (strcmp(integrity_alg, nc->integrity_alg)) { 37351ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg"); 3736036b17eaSPhilipp Reisner goto disconnect_rcu_unlock; 3737036b17eaSPhilipp Reisner } 3738036b17eaSPhilipp Reisner 373986db0618SAndreas Gruenbacher rcu_read_unlock(); 3740fbc12f45SAndreas Gruenbacher } 37417d4c782cSAndreas Gruenbacher 37427d4c782cSAndreas Gruenbacher if (integrity_alg[0]) { 37437d4c782cSAndreas Gruenbacher int hash_size; 37447d4c782cSAndreas Gruenbacher 37457d4c782cSAndreas Gruenbacher /* 37467d4c782cSAndreas Gruenbacher * We can only change the peer data integrity algorithm 37477d4c782cSAndreas Gruenbacher * here. Changing our own data integrity algorithm 37487d4c782cSAndreas Gruenbacher * requires that we send a P_PROTOCOL_UPDATE packet at 37497d4c782cSAndreas Gruenbacher * the same time; otherwise, the peer has no way to 37507d4c782cSAndreas Gruenbacher * tell between which packets the algorithm should 37517d4c782cSAndreas Gruenbacher * change. 37527d4c782cSAndreas Gruenbacher */ 37537d4c782cSAndreas Gruenbacher 37543d234b33SEric Biggers peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0); 37551b57e663SLars Ellenberg if (IS_ERR(peer_integrity_tfm)) { 37561b57e663SLars Ellenberg peer_integrity_tfm = NULL; 37571ec861ebSAndreas Gruenbacher drbd_err(connection, "peer data-integrity-alg %s not supported\n", 37587d4c782cSAndreas Gruenbacher integrity_alg); 3759b411b363SPhilipp Reisner goto disconnect; 3760b411b363SPhilipp Reisner } 3761b411b363SPhilipp Reisner 37623d0e6375SKees Cook hash_size = crypto_shash_digestsize(peer_integrity_tfm); 37637d4c782cSAndreas Gruenbacher int_dig_in = kmalloc(hash_size, GFP_KERNEL); 37647d4c782cSAndreas Gruenbacher int_dig_vv = kmalloc(hash_size, GFP_KERNEL); 37657d4c782cSAndreas Gruenbacher if (!(int_dig_in && int_dig_vv)) { 37661ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of buffers for data integrity checking failed\n"); 37677d4c782cSAndreas Gruenbacher goto disconnect; 37687d4c782cSAndreas Gruenbacher } 37697d4c782cSAndreas Gruenbacher } 37707d4c782cSAndreas Gruenbacher 37717d4c782cSAndreas Gruenbacher new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); 37727d4c782cSAndreas Gruenbacher if (!new_net_conf) { 37731ec861ebSAndreas Gruenbacher drbd_err(connection, "Allocation of new net_conf failed\n"); 3774b411b363SPhilipp Reisner goto disconnect; 3775b411b363SPhilipp Reisner } 3776b411b363SPhilipp Reisner 3777bde89a9eSAndreas Gruenbacher mutex_lock(&connection->data.mutex); 37780500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 3779bde89a9eSAndreas Gruenbacher old_net_conf = connection->net_conf; 37807d4c782cSAndreas Gruenbacher *new_net_conf = *old_net_conf; 3781b411b363SPhilipp Reisner 37827d4c782cSAndreas Gruenbacher new_net_conf->wire_protocol = p_proto; 37837d4c782cSAndreas Gruenbacher new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p); 37847d4c782cSAndreas Gruenbacher new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p); 37857d4c782cSAndreas Gruenbacher new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p); 37867d4c782cSAndreas Gruenbacher new_net_conf->two_primaries = p_two_primaries; 3787b411b363SPhilipp Reisner 3788bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 37890500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3790bde89a9eSAndreas Gruenbacher mutex_unlock(&connection->data.mutex); 3791b411b363SPhilipp Reisner 37923d0e6375SKees Cook crypto_free_shash(connection->peer_integrity_tfm); 3793bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_in); 3794bde89a9eSAndreas Gruenbacher kfree(connection->int_dig_vv); 3795bde89a9eSAndreas Gruenbacher connection->peer_integrity_tfm = peer_integrity_tfm; 3796bde89a9eSAndreas Gruenbacher connection->int_dig_in = int_dig_in; 3797bde89a9eSAndreas Gruenbacher connection->int_dig_vv = int_dig_vv; 3798b411b363SPhilipp Reisner 37997d4c782cSAndreas Gruenbacher if (strcmp(old_net_conf->integrity_alg, integrity_alg)) 38001ec861ebSAndreas Gruenbacher drbd_info(connection, "peer data-integrity-alg: %s\n", 38017d4c782cSAndreas Gruenbacher integrity_alg[0] ? integrity_alg : "(none)"); 3802b411b363SPhilipp Reisner 38037d4c782cSAndreas Gruenbacher synchronize_rcu(); 38047d4c782cSAndreas Gruenbacher kfree(old_net_conf); 380582bc0194SAndreas Gruenbacher return 0; 3806b411b363SPhilipp Reisner 380744ed167dSPhilipp Reisner disconnect_rcu_unlock: 380844ed167dSPhilipp Reisner rcu_read_unlock(); 3809b411b363SPhilipp Reisner disconnect: 38103d0e6375SKees Cook crypto_free_shash(peer_integrity_tfm); 3811036b17eaSPhilipp Reisner kfree(int_dig_in); 3812036b17eaSPhilipp Reisner kfree(int_dig_vv); 3813bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 381482bc0194SAndreas Gruenbacher return -EIO; 3815b411b363SPhilipp Reisner } 3816b411b363SPhilipp Reisner 3817b411b363SPhilipp Reisner /* helper function 3818b411b363SPhilipp Reisner * input: alg name, feature name 3819b411b363SPhilipp Reisner * return: NULL (alg name was "") 3820b411b363SPhilipp Reisner * ERR_PTR(error) if something goes wrong 3821b411b363SPhilipp Reisner * or the crypto hash ptr, if it worked out ok. */ 38223d0e6375SKees Cook static struct crypto_shash *drbd_crypto_alloc_digest_safe( 38233d0e6375SKees Cook const struct drbd_device *device, 3824b411b363SPhilipp Reisner const char *alg, const char *name) 3825b411b363SPhilipp Reisner { 38263d0e6375SKees Cook struct crypto_shash *tfm; 3827b411b363SPhilipp Reisner 3828b411b363SPhilipp Reisner if (!alg[0]) 3829b411b363SPhilipp Reisner return NULL; 3830b411b363SPhilipp Reisner 38313d0e6375SKees Cook tfm = crypto_alloc_shash(alg, 0, 0); 3832b411b363SPhilipp Reisner if (IS_ERR(tfm)) { 3833d0180171SAndreas Gruenbacher drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", 3834b411b363SPhilipp Reisner alg, name, PTR_ERR(tfm)); 3835b411b363SPhilipp Reisner return tfm; 3836b411b363SPhilipp Reisner } 3837b411b363SPhilipp Reisner return tfm; 3838b411b363SPhilipp Reisner } 3839b411b363SPhilipp Reisner 3840bde89a9eSAndreas Gruenbacher static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi) 3841b411b363SPhilipp Reisner { 3842bde89a9eSAndreas Gruenbacher void *buffer = connection->data.rbuf; 38434a76b161SAndreas Gruenbacher int size = pi->size; 38444a76b161SAndreas Gruenbacher 38454a76b161SAndreas Gruenbacher while (size) { 38464a76b161SAndreas Gruenbacher int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE); 3847bde89a9eSAndreas Gruenbacher s = drbd_recv(connection, buffer, s); 38484a76b161SAndreas Gruenbacher if (s <= 0) { 38494a76b161SAndreas Gruenbacher if (s < 0) 38504a76b161SAndreas Gruenbacher return s; 38514a76b161SAndreas Gruenbacher break; 38524a76b161SAndreas Gruenbacher } 38534a76b161SAndreas Gruenbacher size -= s; 38544a76b161SAndreas Gruenbacher } 38554a76b161SAndreas Gruenbacher if (size) 38564a76b161SAndreas Gruenbacher return -EIO; 38574a76b161SAndreas Gruenbacher return 0; 38584a76b161SAndreas Gruenbacher } 38594a76b161SAndreas Gruenbacher 38604a76b161SAndreas Gruenbacher /* 38614a76b161SAndreas Gruenbacher * config_unknown_volume - device configuration command for unknown volume 38624a76b161SAndreas Gruenbacher * 38634a76b161SAndreas Gruenbacher * When a device is added to an existing connection, the node on which the 38644a76b161SAndreas Gruenbacher * device is added first will send configuration commands to its peer but the 38654a76b161SAndreas Gruenbacher * peer will not know about the device yet. It will warn and ignore these 38664a76b161SAndreas Gruenbacher * commands. Once the device is added on the second node, the second node will 38674a76b161SAndreas Gruenbacher * send the same device configuration commands, but in the other direction. 38684a76b161SAndreas Gruenbacher * 38694a76b161SAndreas Gruenbacher * (We can also end up here if drbd is misconfigured.) 38704a76b161SAndreas Gruenbacher */ 3871bde89a9eSAndreas Gruenbacher static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi) 38724a76b161SAndreas Gruenbacher { 38731ec861ebSAndreas Gruenbacher drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n", 38742fcb8f30SAndreas Gruenbacher cmdname(pi->cmd), pi->vnr); 3875bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 38764a76b161SAndreas Gruenbacher } 38774a76b161SAndreas Gruenbacher 3878bde89a9eSAndreas Gruenbacher static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi) 38794a76b161SAndreas Gruenbacher { 38809f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 3881b30ab791SAndreas Gruenbacher struct drbd_device *device; 3882e658983aSAndreas Gruenbacher struct p_rs_param_95 *p; 3883b411b363SPhilipp Reisner unsigned int header_size, data_size, exp_max_sz; 38843d0e6375SKees Cook struct crypto_shash *verify_tfm = NULL; 38853d0e6375SKees Cook struct crypto_shash *csums_tfm = NULL; 38862ec91e0eSPhilipp Reisner struct net_conf *old_net_conf, *new_net_conf = NULL; 3887813472ceSPhilipp Reisner struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; 3888bde89a9eSAndreas Gruenbacher const int apv = connection->agreed_pro_version; 3889813472ceSPhilipp Reisner struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 3890778f271dSPhilipp Reisner int fifo_size = 0; 389182bc0194SAndreas Gruenbacher int err; 3892b411b363SPhilipp Reisner 38939f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 38949f4fe9adSAndreas Gruenbacher if (!peer_device) 3895bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 38969f4fe9adSAndreas Gruenbacher device = peer_device->device; 3897b411b363SPhilipp Reisner 3898b411b363SPhilipp Reisner exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 3899b411b363SPhilipp Reisner : apv == 88 ? sizeof(struct p_rs_param) 3900b411b363SPhilipp Reisner + SHARED_SECRET_MAX 39018e26f9ccSPhilipp Reisner : apv <= 94 ? sizeof(struct p_rs_param_89) 39028e26f9ccSPhilipp Reisner : /* apv >= 95 */ sizeof(struct p_rs_param_95); 3903b411b363SPhilipp Reisner 3904e2857216SAndreas Gruenbacher if (pi->size > exp_max_sz) { 3905d0180171SAndreas Gruenbacher drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n", 3906e2857216SAndreas Gruenbacher pi->size, exp_max_sz); 390782bc0194SAndreas Gruenbacher return -EIO; 3908b411b363SPhilipp Reisner } 3909b411b363SPhilipp Reisner 3910b411b363SPhilipp Reisner if (apv <= 88) { 3911e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param); 3912e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 39138e26f9ccSPhilipp Reisner } else if (apv <= 94) { 3914e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_89); 3915e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 39160b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 39178e26f9ccSPhilipp Reisner } else { 3918e658983aSAndreas Gruenbacher header_size = sizeof(struct p_rs_param_95); 3919e2857216SAndreas Gruenbacher data_size = pi->size - header_size; 39200b0ba1efSAndreas Gruenbacher D_ASSERT(device, data_size == 0); 3921b411b363SPhilipp Reisner } 3922b411b363SPhilipp Reisner 3923b411b363SPhilipp Reisner /* initialize verify_alg and csums_alg */ 3924e658983aSAndreas Gruenbacher p = pi->data; 3925b411b363SPhilipp Reisner memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 3926b411b363SPhilipp Reisner 39279f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, header_size); 392882bc0194SAndreas Gruenbacher if (err) 392982bc0194SAndreas Gruenbacher return err; 3930b411b363SPhilipp Reisner 39310500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 39329f4fe9adSAndreas Gruenbacher old_net_conf = peer_device->connection->net_conf; 3933b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 3934daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 3935daeda1ccSPhilipp Reisner if (!new_disk_conf) { 3936b30ab791SAndreas Gruenbacher put_ldev(device); 39370500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 3938d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 3939daeda1ccSPhilipp Reisner return -ENOMEM; 3940f399002eSLars Ellenberg } 3941b411b363SPhilipp Reisner 3942b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 3943daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 3944daeda1ccSPhilipp Reisner 39456394b935SAndreas Gruenbacher new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate); 3946813472ceSPhilipp Reisner } 3947b411b363SPhilipp Reisner 3948b411b363SPhilipp Reisner if (apv >= 88) { 3949b411b363SPhilipp Reisner if (apv == 88) { 39505de73827SPhilipp Reisner if (data_size > SHARED_SECRET_MAX || data_size == 0) { 3951d0180171SAndreas Gruenbacher drbd_err(device, "verify-alg of wrong size, " 39525de73827SPhilipp Reisner "peer wants %u, accepting only up to %u byte\n", 3953b411b363SPhilipp Reisner data_size, SHARED_SECRET_MAX); 3954813472ceSPhilipp Reisner err = -EIO; 3955813472ceSPhilipp Reisner goto reconnect; 3956b411b363SPhilipp Reisner } 3957b411b363SPhilipp Reisner 39589f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size); 3959813472ceSPhilipp Reisner if (err) 3960813472ceSPhilipp Reisner goto reconnect; 3961b411b363SPhilipp Reisner /* we expect NUL terminated string */ 3962b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 39630b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[data_size-1] == 0); 3964b411b363SPhilipp Reisner p->verify_alg[data_size-1] = 0; 3965b411b363SPhilipp Reisner 3966b411b363SPhilipp Reisner } else /* apv >= 89 */ { 3967b411b363SPhilipp Reisner /* we still expect NUL terminated strings */ 3968b411b363SPhilipp Reisner /* but just in case someone tries to be evil */ 39690b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0); 39700b0ba1efSAndreas Gruenbacher D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0); 3971b411b363SPhilipp Reisner p->verify_alg[SHARED_SECRET_MAX-1] = 0; 3972b411b363SPhilipp Reisner p->csums_alg[SHARED_SECRET_MAX-1] = 0; 3973b411b363SPhilipp Reisner } 3974b411b363SPhilipp Reisner 39752ec91e0eSPhilipp Reisner if (strcmp(old_net_conf->verify_alg, p->verify_alg)) { 3976b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3977d0180171SAndreas Gruenbacher drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", 39782ec91e0eSPhilipp Reisner old_net_conf->verify_alg, p->verify_alg); 3979b411b363SPhilipp Reisner goto disconnect; 3980b411b363SPhilipp Reisner } 3981b30ab791SAndreas Gruenbacher verify_tfm = drbd_crypto_alloc_digest_safe(device, 3982b411b363SPhilipp Reisner p->verify_alg, "verify-alg"); 3983b411b363SPhilipp Reisner if (IS_ERR(verify_tfm)) { 3984b411b363SPhilipp Reisner verify_tfm = NULL; 3985b411b363SPhilipp Reisner goto disconnect; 3986b411b363SPhilipp Reisner } 3987b411b363SPhilipp Reisner } 3988b411b363SPhilipp Reisner 39892ec91e0eSPhilipp Reisner if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) { 3990b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) { 3991d0180171SAndreas Gruenbacher drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", 39922ec91e0eSPhilipp Reisner old_net_conf->csums_alg, p->csums_alg); 3993b411b363SPhilipp Reisner goto disconnect; 3994b411b363SPhilipp Reisner } 3995b30ab791SAndreas Gruenbacher csums_tfm = drbd_crypto_alloc_digest_safe(device, 3996b411b363SPhilipp Reisner p->csums_alg, "csums-alg"); 3997b411b363SPhilipp Reisner if (IS_ERR(csums_tfm)) { 3998b411b363SPhilipp Reisner csums_tfm = NULL; 3999b411b363SPhilipp Reisner goto disconnect; 4000b411b363SPhilipp Reisner } 4001b411b363SPhilipp Reisner } 4002b411b363SPhilipp Reisner 4003813472ceSPhilipp Reisner if (apv > 94 && new_disk_conf) { 4004daeda1ccSPhilipp Reisner new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead); 4005daeda1ccSPhilipp Reisner new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target); 4006daeda1ccSPhilipp Reisner new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target); 4007daeda1ccSPhilipp Reisner new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate); 4008778f271dSPhilipp Reisner 4009daeda1ccSPhilipp Reisner fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 4010b30ab791SAndreas Gruenbacher if (fifo_size != device->rs_plan_s->size) { 4011813472ceSPhilipp Reisner new_plan = fifo_alloc(fifo_size); 4012813472ceSPhilipp Reisner if (!new_plan) { 4013d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of fifo_buffer failed"); 4014b30ab791SAndreas Gruenbacher put_ldev(device); 4015778f271dSPhilipp Reisner goto disconnect; 4016778f271dSPhilipp Reisner } 4017778f271dSPhilipp Reisner } 40188e26f9ccSPhilipp Reisner } 4019b411b363SPhilipp Reisner 402091fd4dadSPhilipp Reisner if (verify_tfm || csums_tfm) { 40212ec91e0eSPhilipp Reisner new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 40222ec91e0eSPhilipp Reisner if (!new_net_conf) { 4023d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new net_conf failed\n"); 402491fd4dadSPhilipp Reisner goto disconnect; 402591fd4dadSPhilipp Reisner } 402691fd4dadSPhilipp Reisner 40272ec91e0eSPhilipp Reisner *new_net_conf = *old_net_conf; 402891fd4dadSPhilipp Reisner 4029b411b363SPhilipp Reisner if (verify_tfm) { 40302ec91e0eSPhilipp Reisner strcpy(new_net_conf->verify_alg, p->verify_alg); 40312ec91e0eSPhilipp Reisner new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; 40323d0e6375SKees Cook crypto_free_shash(peer_device->connection->verify_tfm); 40339f4fe9adSAndreas Gruenbacher peer_device->connection->verify_tfm = verify_tfm; 4034d0180171SAndreas Gruenbacher drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); 4035b411b363SPhilipp Reisner } 4036b411b363SPhilipp Reisner if (csums_tfm) { 40372ec91e0eSPhilipp Reisner strcpy(new_net_conf->csums_alg, p->csums_alg); 40382ec91e0eSPhilipp Reisner new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; 40393d0e6375SKees Cook crypto_free_shash(peer_device->connection->csums_tfm); 40409f4fe9adSAndreas Gruenbacher peer_device->connection->csums_tfm = csums_tfm; 4041d0180171SAndreas Gruenbacher drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); 4042b411b363SPhilipp Reisner } 4043bde89a9eSAndreas Gruenbacher rcu_assign_pointer(connection->net_conf, new_net_conf); 4044778f271dSPhilipp Reisner } 4045b411b363SPhilipp Reisner } 4046b411b363SPhilipp Reisner 4047813472ceSPhilipp Reisner if (new_disk_conf) { 4048b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 4049b30ab791SAndreas Gruenbacher put_ldev(device); 4050b411b363SPhilipp Reisner } 4051813472ceSPhilipp Reisner 4052813472ceSPhilipp Reisner if (new_plan) { 4053b30ab791SAndreas Gruenbacher old_plan = device->rs_plan_s; 4054b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->rs_plan_s, new_plan); 4055813472ceSPhilipp Reisner } 4056daeda1ccSPhilipp Reisner 40570500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 4058daeda1ccSPhilipp Reisner synchronize_rcu(); 4059daeda1ccSPhilipp Reisner if (new_net_conf) 4060daeda1ccSPhilipp Reisner kfree(old_net_conf); 4061daeda1ccSPhilipp Reisner kfree(old_disk_conf); 4062813472ceSPhilipp Reisner kfree(old_plan); 4063daeda1ccSPhilipp Reisner 406482bc0194SAndreas Gruenbacher return 0; 4065b411b363SPhilipp Reisner 4066813472ceSPhilipp Reisner reconnect: 4067813472ceSPhilipp Reisner if (new_disk_conf) { 4068b30ab791SAndreas Gruenbacher put_ldev(device); 4069813472ceSPhilipp Reisner kfree(new_disk_conf); 4070813472ceSPhilipp Reisner } 40710500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 4072813472ceSPhilipp Reisner return -EIO; 4073813472ceSPhilipp Reisner 4074b411b363SPhilipp Reisner disconnect: 4075813472ceSPhilipp Reisner kfree(new_plan); 4076813472ceSPhilipp Reisner if (new_disk_conf) { 4077b30ab791SAndreas Gruenbacher put_ldev(device); 4078813472ceSPhilipp Reisner kfree(new_disk_conf); 4079813472ceSPhilipp Reisner } 40800500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 4081b411b363SPhilipp Reisner /* just for completeness: actually not needed, 4082b411b363SPhilipp Reisner * as this is not reached if csums_tfm was ok. */ 40833d0e6375SKees Cook crypto_free_shash(csums_tfm); 4084b411b363SPhilipp Reisner /* but free the verify_tfm again, if csums_tfm did not work out */ 40853d0e6375SKees Cook crypto_free_shash(verify_tfm); 40869f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 408782bc0194SAndreas Gruenbacher return -EIO; 4088b411b363SPhilipp Reisner } 4089b411b363SPhilipp Reisner 4090b411b363SPhilipp Reisner /* warn if the arguments differ by more than 12.5% */ 4091b30ab791SAndreas Gruenbacher static void warn_if_differ_considerably(struct drbd_device *device, 4092b411b363SPhilipp Reisner const char *s, sector_t a, sector_t b) 4093b411b363SPhilipp Reisner { 4094b411b363SPhilipp Reisner sector_t d; 4095b411b363SPhilipp Reisner if (a == 0 || b == 0) 4096b411b363SPhilipp Reisner return; 4097b411b363SPhilipp Reisner d = (a > b) ? (a - b) : (b - a); 4098b411b363SPhilipp Reisner if (d > (a>>3) || d > (b>>3)) 4099d0180171SAndreas Gruenbacher drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s, 4100b411b363SPhilipp Reisner (unsigned long long)a, (unsigned long long)b); 4101b411b363SPhilipp Reisner } 4102b411b363SPhilipp Reisner 4103bde89a9eSAndreas Gruenbacher static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi) 4104b411b363SPhilipp Reisner { 41059f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4106b30ab791SAndreas Gruenbacher struct drbd_device *device; 4107e658983aSAndreas Gruenbacher struct p_sizes *p = pi->data; 41089104d31aSLars Ellenberg struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; 4109e96c9633SPhilipp Reisner enum determine_dev_size dd = DS_UNCHANGED; 41106a8d68b1SLars Ellenberg sector_t p_size, p_usize, p_csize, my_usize; 411194c43a13SLars Ellenberg sector_t new_size, cur_size; 4112b411b363SPhilipp Reisner int ldsc = 0; /* local disk size changed */ 4113e89b591cSPhilipp Reisner enum dds_flags ddsf; 4114b411b363SPhilipp Reisner 41159f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 41169f4fe9adSAndreas Gruenbacher if (!peer_device) 4117bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 41189f4fe9adSAndreas Gruenbacher device = peer_device->device; 411994c43a13SLars Ellenberg cur_size = drbd_get_capacity(device->this_bdev); 41204a76b161SAndreas Gruenbacher 4121b411b363SPhilipp Reisner p_size = be64_to_cpu(p->d_size); 4122b411b363SPhilipp Reisner p_usize = be64_to_cpu(p->u_size); 41236a8d68b1SLars Ellenberg p_csize = be64_to_cpu(p->c_size); 4124b411b363SPhilipp Reisner 4125b411b363SPhilipp Reisner /* just store the peer's disk size for now. 4126b411b363SPhilipp Reisner * we still need to figure out whether we accept that. */ 4127b30ab791SAndreas Gruenbacher device->p_size = p_size; 4128b411b363SPhilipp Reisner 4129b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 4130daeda1ccSPhilipp Reisner rcu_read_lock(); 4131b30ab791SAndreas Gruenbacher my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; 4132daeda1ccSPhilipp Reisner rcu_read_unlock(); 4133daeda1ccSPhilipp Reisner 4134b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "lower level device sizes", 4135b30ab791SAndreas Gruenbacher p_size, drbd_get_max_capacity(device->ldev)); 4136b30ab791SAndreas Gruenbacher warn_if_differ_considerably(device, "user requested size", 4137daeda1ccSPhilipp Reisner p_usize, my_usize); 4138b411b363SPhilipp Reisner 4139b411b363SPhilipp Reisner /* if this is the first connect, or an otherwise expected 4140b411b363SPhilipp Reisner * param exchange, choose the minimum */ 4141b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_REPORT_PARAMS) 4142daeda1ccSPhilipp Reisner p_usize = min_not_zero(my_usize, p_usize); 4143b411b363SPhilipp Reisner 4144ad6e8979SLars Ellenberg /* Never shrink a device with usable data during connect, 4145ad6e8979SLars Ellenberg * or "attach" on the peer. 4146ad6e8979SLars Ellenberg * But allow online shrinking if we are connected. */ 414760bac040SLars Ellenberg new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); 414860bac040SLars Ellenberg if (new_size < cur_size && 4149b30ab791SAndreas Gruenbacher device->state.disk >= D_OUTDATED && 4150ad6e8979SLars Ellenberg (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) { 415160bac040SLars Ellenberg drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", 415260bac040SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size); 41539f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 4154b30ab791SAndreas Gruenbacher put_ldev(device); 415582bc0194SAndreas Gruenbacher return -EIO; 4156b411b363SPhilipp Reisner } 4157daeda1ccSPhilipp Reisner 4158daeda1ccSPhilipp Reisner if (my_usize != p_usize) { 4159daeda1ccSPhilipp Reisner struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 4160daeda1ccSPhilipp Reisner 4161daeda1ccSPhilipp Reisner new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL); 4162daeda1ccSPhilipp Reisner if (!new_disk_conf) { 4163d0180171SAndreas Gruenbacher drbd_err(device, "Allocation of new disk_conf failed\n"); 4164b30ab791SAndreas Gruenbacher put_ldev(device); 4165daeda1ccSPhilipp Reisner return -ENOMEM; 4166daeda1ccSPhilipp Reisner } 4167daeda1ccSPhilipp Reisner 41680500813fSAndreas Gruenbacher mutex_lock(&connection->resource->conf_update); 4169b30ab791SAndreas Gruenbacher old_disk_conf = device->ldev->disk_conf; 4170daeda1ccSPhilipp Reisner *new_disk_conf = *old_disk_conf; 4171daeda1ccSPhilipp Reisner new_disk_conf->disk_size = p_usize; 4172daeda1ccSPhilipp Reisner 4173b30ab791SAndreas Gruenbacher rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 41740500813fSAndreas Gruenbacher mutex_unlock(&connection->resource->conf_update); 4175daeda1ccSPhilipp Reisner synchronize_rcu(); 4176daeda1ccSPhilipp Reisner kfree(old_disk_conf); 4177daeda1ccSPhilipp Reisner 4178ad6e8979SLars Ellenberg drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n", 4179ad6e8979SLars Ellenberg (unsigned long)p_usize, (unsigned long)my_usize); 4180daeda1ccSPhilipp Reisner } 4181daeda1ccSPhilipp Reisner 4182b30ab791SAndreas Gruenbacher put_ldev(device); 4183b411b363SPhilipp Reisner } 4184b411b363SPhilipp Reisner 418520c68fdeSLars Ellenberg device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 4186dd4f699dSLars Ellenberg /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). 418720c68fdeSLars Ellenberg In case we cleared the QUEUE_FLAG_DISCARD from our queue in 4188dd4f699dSLars Ellenberg drbd_reconsider_queue_parameters(), we can be sure that after 418920c68fdeSLars Ellenberg drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ 419020c68fdeSLars Ellenberg 4191e89b591cSPhilipp Reisner ddsf = be16_to_cpu(p->dds_flags); 4192b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 41939104d31aSLars Ellenberg drbd_reconsider_queue_parameters(device, device->ldev, o); 4194b30ab791SAndreas Gruenbacher dd = drbd_determine_dev_size(device, ddsf, NULL); 4195b30ab791SAndreas Gruenbacher put_ldev(device); 4196e96c9633SPhilipp Reisner if (dd == DS_ERROR) 419782bc0194SAndreas Gruenbacher return -EIO; 4198b30ab791SAndreas Gruenbacher drbd_md_sync(device); 4199b411b363SPhilipp Reisner } else { 42006a8d68b1SLars Ellenberg /* 42016a8d68b1SLars Ellenberg * I am diskless, need to accept the peer's *current* size. 42026a8d68b1SLars Ellenberg * I must NOT accept the peers backing disk size, 42036a8d68b1SLars Ellenberg * it may have been larger than mine all along... 42046a8d68b1SLars Ellenberg * 42056a8d68b1SLars Ellenberg * At this point, the peer knows more about my disk, or at 42066a8d68b1SLars Ellenberg * least about what we last agreed upon, than myself. 42076a8d68b1SLars Ellenberg * So if his c_size is less than his d_size, the most likely 42086a8d68b1SLars Ellenberg * reason is that *my* d_size was smaller last time we checked. 42096a8d68b1SLars Ellenberg * 42106a8d68b1SLars Ellenberg * However, if he sends a zero current size, 42116a8d68b1SLars Ellenberg * take his (user-capped or) backing disk size anyways. 421294c43a13SLars Ellenberg * 421394c43a13SLars Ellenberg * Unless of course he does not have a disk himself. 421494c43a13SLars Ellenberg * In which case we ignore this completely. 42156a8d68b1SLars Ellenberg */ 421694c43a13SLars Ellenberg sector_t new_size = p_csize ?: p_usize ?: p_size; 42179104d31aSLars Ellenberg drbd_reconsider_queue_parameters(device, NULL, o); 421894c43a13SLars Ellenberg if (new_size == 0) { 421994c43a13SLars Ellenberg /* Ignore, peer does not know nothing. */ 422094c43a13SLars Ellenberg } else if (new_size == cur_size) { 422194c43a13SLars Ellenberg /* nothing to do */ 422294c43a13SLars Ellenberg } else if (cur_size != 0 && p_size == 0) { 422394c43a13SLars Ellenberg drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", 422494c43a13SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size); 422594c43a13SLars Ellenberg } else if (new_size < cur_size && device->state.role == R_PRIMARY) { 422694c43a13SLars Ellenberg drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", 422794c43a13SLars Ellenberg (unsigned long long)new_size, (unsigned long long)cur_size); 422894c43a13SLars Ellenberg conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 422994c43a13SLars Ellenberg return -EIO; 423094c43a13SLars Ellenberg } else { 423194c43a13SLars Ellenberg /* I believe the peer, if 423294c43a13SLars Ellenberg * - I don't have a current size myself 423394c43a13SLars Ellenberg * - we agree on the size anyways 423494c43a13SLars Ellenberg * - I do have a current size, am Secondary, 423594c43a13SLars Ellenberg * and he has the only disk 423694c43a13SLars Ellenberg * - I do have a current size, am Primary, 423794c43a13SLars Ellenberg * and he has the only disk, 423894c43a13SLars Ellenberg * which is larger than my current size 423994c43a13SLars Ellenberg */ 424094c43a13SLars Ellenberg drbd_set_my_capacity(device, new_size); 424194c43a13SLars Ellenberg } 4242b411b363SPhilipp Reisner } 4243b411b363SPhilipp Reisner 4244b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 4245b30ab791SAndreas Gruenbacher if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { 4246b30ab791SAndreas Gruenbacher device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 4247b411b363SPhilipp Reisner ldsc = 1; 4248b411b363SPhilipp Reisner } 4249b411b363SPhilipp Reisner 4250b30ab791SAndreas Gruenbacher put_ldev(device); 4251b411b363SPhilipp Reisner } 4252b411b363SPhilipp Reisner 4253b30ab791SAndreas Gruenbacher if (device->state.conn > C_WF_REPORT_PARAMS) { 4254b411b363SPhilipp Reisner if (be64_to_cpu(p->c_size) != 4255b30ab791SAndreas Gruenbacher drbd_get_capacity(device->this_bdev) || ldsc) { 4256b411b363SPhilipp Reisner /* we have different sizes, probably peer 4257b411b363SPhilipp Reisner * needs to know my new size... */ 425869a22773SAndreas Gruenbacher drbd_send_sizes(peer_device, 0, ddsf); 4259b411b363SPhilipp Reisner } 4260b30ab791SAndreas Gruenbacher if (test_and_clear_bit(RESIZE_PENDING, &device->flags) || 4261b30ab791SAndreas Gruenbacher (dd == DS_GREW && device->state.conn == C_CONNECTED)) { 4262b30ab791SAndreas Gruenbacher if (device->state.pdsk >= D_INCONSISTENT && 4263b30ab791SAndreas Gruenbacher device->state.disk >= D_INCONSISTENT) { 4264e89b591cSPhilipp Reisner if (ddsf & DDSF_NO_RESYNC) 4265d0180171SAndreas Gruenbacher drbd_info(device, "Resync of new storage suppressed with --assume-clean\n"); 4266b411b363SPhilipp Reisner else 4267b30ab791SAndreas Gruenbacher resync_after_online_grow(device); 4268e89b591cSPhilipp Reisner } else 4269b30ab791SAndreas Gruenbacher set_bit(RESYNC_AFTER_NEG, &device->flags); 4270b411b363SPhilipp Reisner } 4271b411b363SPhilipp Reisner } 4272b411b363SPhilipp Reisner 427382bc0194SAndreas Gruenbacher return 0; 4274b411b363SPhilipp Reisner } 4275b411b363SPhilipp Reisner 4276bde89a9eSAndreas Gruenbacher static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi) 4277b411b363SPhilipp Reisner { 42789f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4279b30ab791SAndreas Gruenbacher struct drbd_device *device; 4280e658983aSAndreas Gruenbacher struct p_uuids *p = pi->data; 4281b411b363SPhilipp Reisner u64 *p_uuid; 428262b0da3aSLars Ellenberg int i, updated_uuids = 0; 4283b411b363SPhilipp Reisner 42849f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 42859f4fe9adSAndreas Gruenbacher if (!peer_device) 4286bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 42879f4fe9adSAndreas Gruenbacher device = peer_device->device; 42884a76b161SAndreas Gruenbacher 4289365cf663SRoland Kammerer p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); 4290063eacf8SJing Wang if (!p_uuid) { 4291d0180171SAndreas Gruenbacher drbd_err(device, "kmalloc of p_uuid failed\n"); 4292063eacf8SJing Wang return false; 4293063eacf8SJing Wang } 4294b411b363SPhilipp Reisner 4295b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 4296b411b363SPhilipp Reisner p_uuid[i] = be64_to_cpu(p->uuid[i]); 4297b411b363SPhilipp Reisner 4298b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 4299b30ab791SAndreas Gruenbacher device->p_uuid = p_uuid; 4300b411b363SPhilipp Reisner 4301b17b5960SLars Ellenberg if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) && 4302b30ab791SAndreas Gruenbacher device->state.disk < D_INCONSISTENT && 4303b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY && 4304b30ab791SAndreas Gruenbacher (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { 4305d0180171SAndreas Gruenbacher drbd_err(device, "Can only connect to data with current UUID=%016llX\n", 4306b30ab791SAndreas Gruenbacher (unsigned long long)device->ed_uuid); 43079f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 430882bc0194SAndreas Gruenbacher return -EIO; 4309b411b363SPhilipp Reisner } 4310b411b363SPhilipp Reisner 4311b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 4312b411b363SPhilipp Reisner int skip_initial_sync = 4313b30ab791SAndreas Gruenbacher device->state.conn == C_CONNECTED && 43149f4fe9adSAndreas Gruenbacher peer_device->connection->agreed_pro_version >= 90 && 4315b30ab791SAndreas Gruenbacher device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && 4316b411b363SPhilipp Reisner (p_uuid[UI_FLAGS] & 8); 4317b411b363SPhilipp Reisner if (skip_initial_sync) { 4318d0180171SAndreas Gruenbacher drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); 4319b30ab791SAndreas Gruenbacher drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 432020ceb2b2SLars Ellenberg "clear_n_write from receive_uuids", 432120ceb2b2SLars Ellenberg BM_LOCKED_TEST_ALLOWED); 4322b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); 4323b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0); 4324b30ab791SAndreas Gruenbacher _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 4325b411b363SPhilipp Reisner CS_VERBOSE, NULL); 4326b30ab791SAndreas Gruenbacher drbd_md_sync(device); 432762b0da3aSLars Ellenberg updated_uuids = 1; 4328b411b363SPhilipp Reisner } 4329b30ab791SAndreas Gruenbacher put_ldev(device); 4330b30ab791SAndreas Gruenbacher } else if (device->state.disk < D_INCONSISTENT && 4331b30ab791SAndreas Gruenbacher device->state.role == R_PRIMARY) { 433218a50fa2SPhilipp Reisner /* I am a diskless primary, the peer just created a new current UUID 433318a50fa2SPhilipp Reisner for me. */ 4334b30ab791SAndreas Gruenbacher updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 4335b411b363SPhilipp Reisner } 4336b411b363SPhilipp Reisner 4337b411b363SPhilipp Reisner /* Before we test for the disk state, we should wait until an eventually 4338b411b363SPhilipp Reisner ongoing cluster wide state change is finished. That is important if 4339b411b363SPhilipp Reisner we are primary and are detaching from our disk. We need to see the 4340b411b363SPhilipp Reisner new disk state... */ 4341b30ab791SAndreas Gruenbacher mutex_lock(device->state_mutex); 4342b30ab791SAndreas Gruenbacher mutex_unlock(device->state_mutex); 4343b30ab791SAndreas Gruenbacher if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT) 4344b30ab791SAndreas Gruenbacher updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]); 434562b0da3aSLars Ellenberg 434662b0da3aSLars Ellenberg if (updated_uuids) 4347b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "receiver updated UUIDs to"); 4348b411b363SPhilipp Reisner 434982bc0194SAndreas Gruenbacher return 0; 4350b411b363SPhilipp Reisner } 4351b411b363SPhilipp Reisner 4352b411b363SPhilipp Reisner /** 4353b411b363SPhilipp Reisner * convert_state() - Converts the peer's view of the cluster state to our point of view 4354b411b363SPhilipp Reisner * @ps: The state as seen by the peer. 4355b411b363SPhilipp Reisner */ 4356b411b363SPhilipp Reisner static union drbd_state convert_state(union drbd_state ps) 4357b411b363SPhilipp Reisner { 4358b411b363SPhilipp Reisner union drbd_state ms; 4359b411b363SPhilipp Reisner 4360b411b363SPhilipp Reisner static enum drbd_conns c_tab[] = { 4361369bea63SPhilipp Reisner [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS, 4362b411b363SPhilipp Reisner [C_CONNECTED] = C_CONNECTED, 4363b411b363SPhilipp Reisner 4364b411b363SPhilipp Reisner [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, 4365b411b363SPhilipp Reisner [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, 4366b411b363SPhilipp Reisner [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ 4367b411b363SPhilipp Reisner [C_VERIFY_S] = C_VERIFY_T, 4368b411b363SPhilipp Reisner [C_MASK] = C_MASK, 4369b411b363SPhilipp Reisner }; 4370b411b363SPhilipp Reisner 4371b411b363SPhilipp Reisner ms.i = ps.i; 4372b411b363SPhilipp Reisner 4373b411b363SPhilipp Reisner ms.conn = c_tab[ps.conn]; 4374b411b363SPhilipp Reisner ms.peer = ps.role; 4375b411b363SPhilipp Reisner ms.role = ps.peer; 4376b411b363SPhilipp Reisner ms.pdsk = ps.disk; 4377b411b363SPhilipp Reisner ms.disk = ps.pdsk; 4378b411b363SPhilipp Reisner ms.peer_isp = (ps.aftr_isp | ps.user_isp); 4379b411b363SPhilipp Reisner 4380b411b363SPhilipp Reisner return ms; 4381b411b363SPhilipp Reisner } 4382b411b363SPhilipp Reisner 4383bde89a9eSAndreas Gruenbacher static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi) 4384b411b363SPhilipp Reisner { 43859f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4386b30ab791SAndreas Gruenbacher struct drbd_device *device; 4387e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 4388b411b363SPhilipp Reisner union drbd_state mask, val; 4389bf885f8aSAndreas Gruenbacher enum drbd_state_rv rv; 4390b411b363SPhilipp Reisner 43919f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 43929f4fe9adSAndreas Gruenbacher if (!peer_device) 43934a76b161SAndreas Gruenbacher return -EIO; 43949f4fe9adSAndreas Gruenbacher device = peer_device->device; 43954a76b161SAndreas Gruenbacher 4396b411b363SPhilipp Reisner mask.i = be32_to_cpu(p->mask); 4397b411b363SPhilipp Reisner val.i = be32_to_cpu(p->val); 4398b411b363SPhilipp Reisner 43999f4fe9adSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) && 4400b30ab791SAndreas Gruenbacher mutex_is_locked(device->state_mutex)) { 440169a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG); 440282bc0194SAndreas Gruenbacher return 0; 4403b411b363SPhilipp Reisner } 4404b411b363SPhilipp Reisner 4405b411b363SPhilipp Reisner mask = convert_state(mask); 4406b411b363SPhilipp Reisner val = convert_state(val); 4407b411b363SPhilipp Reisner 4408b30ab791SAndreas Gruenbacher rv = drbd_change_state(device, CS_VERBOSE, mask, val); 440969a22773SAndreas Gruenbacher drbd_send_sr_reply(peer_device, rv); 4410047cd4a6SPhilipp Reisner 4411b30ab791SAndreas Gruenbacher drbd_md_sync(device); 4412b411b363SPhilipp Reisner 441382bc0194SAndreas Gruenbacher return 0; 4414b411b363SPhilipp Reisner } 4415b411b363SPhilipp Reisner 4416bde89a9eSAndreas Gruenbacher static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi) 4417b411b363SPhilipp Reisner { 4418e658983aSAndreas Gruenbacher struct p_req_state *p = pi->data; 4419dfafcc8aSPhilipp Reisner union drbd_state mask, val; 4420dfafcc8aSPhilipp Reisner enum drbd_state_rv rv; 4421dfafcc8aSPhilipp Reisner 4422dfafcc8aSPhilipp Reisner mask.i = be32_to_cpu(p->mask); 4423dfafcc8aSPhilipp Reisner val.i = be32_to_cpu(p->val); 4424dfafcc8aSPhilipp Reisner 4425bde89a9eSAndreas Gruenbacher if (test_bit(RESOLVE_CONFLICTS, &connection->flags) && 4426bde89a9eSAndreas Gruenbacher mutex_is_locked(&connection->cstate_mutex)) { 4427bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG); 442882bc0194SAndreas Gruenbacher return 0; 4429dfafcc8aSPhilipp Reisner } 4430dfafcc8aSPhilipp Reisner 4431dfafcc8aSPhilipp Reisner mask = convert_state(mask); 4432dfafcc8aSPhilipp Reisner val = convert_state(val); 4433dfafcc8aSPhilipp Reisner 4434bde89a9eSAndreas Gruenbacher rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL); 4435bde89a9eSAndreas Gruenbacher conn_send_sr_reply(connection, rv); 4436dfafcc8aSPhilipp Reisner 443782bc0194SAndreas Gruenbacher return 0; 4438dfafcc8aSPhilipp Reisner } 4439dfafcc8aSPhilipp Reisner 4440bde89a9eSAndreas Gruenbacher static int receive_state(struct drbd_connection *connection, struct packet_info *pi) 4441b411b363SPhilipp Reisner { 44429f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4443b30ab791SAndreas Gruenbacher struct drbd_device *device; 4444e658983aSAndreas Gruenbacher struct p_state *p = pi->data; 44454ac4aadaSLars Ellenberg union drbd_state os, ns, peer_state; 4446b411b363SPhilipp Reisner enum drbd_disk_state real_peer_disk; 444765d922c3SPhilipp Reisner enum chg_state_flags cs_flags; 4448b411b363SPhilipp Reisner int rv; 4449b411b363SPhilipp Reisner 44509f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 44519f4fe9adSAndreas Gruenbacher if (!peer_device) 4452bde89a9eSAndreas Gruenbacher return config_unknown_volume(connection, pi); 44539f4fe9adSAndreas Gruenbacher device = peer_device->device; 44544a76b161SAndreas Gruenbacher 4455b411b363SPhilipp Reisner peer_state.i = be32_to_cpu(p->state); 4456b411b363SPhilipp Reisner 4457b411b363SPhilipp Reisner real_peer_disk = peer_state.disk; 4458b411b363SPhilipp Reisner if (peer_state.disk == D_NEGOTIATING) { 4459b30ab791SAndreas Gruenbacher real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; 4460d0180171SAndreas Gruenbacher drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); 4461b411b363SPhilipp Reisner } 4462b411b363SPhilipp Reisner 44630500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4464b411b363SPhilipp Reisner retry: 4465b30ab791SAndreas Gruenbacher os = ns = drbd_read_state(device); 44660500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4467b411b363SPhilipp Reisner 4468668700b4SPhilipp Reisner /* If some other part of the code (ack_receiver thread, timeout) 4469545752d5SLars Ellenberg * already decided to close the connection again, 4470545752d5SLars Ellenberg * we must not "re-establish" it here. */ 4471545752d5SLars Ellenberg if (os.conn <= C_TEAR_DOWN) 447258ffa580SLars Ellenberg return -ECONNRESET; 4473545752d5SLars Ellenberg 447440424e4aSLars Ellenberg /* If this is the "end of sync" confirmation, usually the peer disk 447540424e4aSLars Ellenberg * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits 447640424e4aSLars Ellenberg * set) resync started in PausedSyncT, or if the timing of pause-/ 447740424e4aSLars Ellenberg * unpause-sync events has been "just right", the peer disk may 447840424e4aSLars Ellenberg * transition from D_CONSISTENT to D_UP_TO_DATE as well. 447940424e4aSLars Ellenberg */ 448040424e4aSLars Ellenberg if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && 448140424e4aSLars Ellenberg real_peer_disk == D_UP_TO_DATE && 4482e9ef7bb6SLars Ellenberg os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { 4483e9ef7bb6SLars Ellenberg /* If we are (becoming) SyncSource, but peer is still in sync 4484e9ef7bb6SLars Ellenberg * preparation, ignore its uptodate-ness to avoid flapping, it 4485e9ef7bb6SLars Ellenberg * will change to inconsistent once the peer reaches active 4486e9ef7bb6SLars Ellenberg * syncing states. 4487e9ef7bb6SLars Ellenberg * It may have changed syncer-paused flags, however, so we 4488e9ef7bb6SLars Ellenberg * cannot ignore this completely. */ 4489e9ef7bb6SLars Ellenberg if (peer_state.conn > C_CONNECTED && 4490e9ef7bb6SLars Ellenberg peer_state.conn < C_SYNC_SOURCE) 4491e9ef7bb6SLars Ellenberg real_peer_disk = D_INCONSISTENT; 4492e9ef7bb6SLars Ellenberg 4493e9ef7bb6SLars Ellenberg /* if peer_state changes to connected at the same time, 4494e9ef7bb6SLars Ellenberg * it explicitly notifies us that it finished resync. 4495e9ef7bb6SLars Ellenberg * Maybe we should finish it up, too? */ 4496e9ef7bb6SLars Ellenberg else if (os.conn >= C_SYNC_SOURCE && 4497e9ef7bb6SLars Ellenberg peer_state.conn == C_CONNECTED) { 4498b30ab791SAndreas Gruenbacher if (drbd_bm_total_weight(device) <= device->rs_failed) 4499b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 450082bc0194SAndreas Gruenbacher return 0; 4501e9ef7bb6SLars Ellenberg } 4502e9ef7bb6SLars Ellenberg } 4503e9ef7bb6SLars Ellenberg 450402b91b55SLars Ellenberg /* explicit verify finished notification, stop sector reached. */ 450502b91b55SLars Ellenberg if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && 450602b91b55SLars Ellenberg peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { 4507b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 4508b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 450958ffa580SLars Ellenberg return 0; 451002b91b55SLars Ellenberg } 451102b91b55SLars Ellenberg 4512e9ef7bb6SLars Ellenberg /* peer says his disk is inconsistent, while we think it is uptodate, 4513e9ef7bb6SLars Ellenberg * and this happens while the peer still thinks we have a sync going on, 4514e9ef7bb6SLars Ellenberg * but we think we are already done with the sync. 4515e9ef7bb6SLars Ellenberg * We ignore this to avoid flapping pdsk. 4516e9ef7bb6SLars Ellenberg * This should not happen, if the peer is a recent version of drbd. */ 4517e9ef7bb6SLars Ellenberg if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && 4518e9ef7bb6SLars Ellenberg os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) 4519e9ef7bb6SLars Ellenberg real_peer_disk = D_UP_TO_DATE; 4520e9ef7bb6SLars Ellenberg 45214ac4aadaSLars Ellenberg if (ns.conn == C_WF_REPORT_PARAMS) 45224ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4523b411b363SPhilipp Reisner 452467531718SPhilipp Reisner if (peer_state.conn == C_AHEAD) 452567531718SPhilipp Reisner ns.conn = C_BEHIND; 452667531718SPhilipp Reisner 4527fe43ed97SLars Ellenberg /* TODO: 4528fe43ed97SLars Ellenberg * if (primary and diskless and peer uuid != effective uuid) 4529fe43ed97SLars Ellenberg * abort attach on peer; 4530fe43ed97SLars Ellenberg * 4531fe43ed97SLars Ellenberg * If this node does not have good data, was already connected, but 4532fe43ed97SLars Ellenberg * the peer did a late attach only now, trying to "negotiate" with me, 4533fe43ed97SLars Ellenberg * AND I am currently Primary, possibly frozen, with some specific 4534fe43ed97SLars Ellenberg * "effective" uuid, this should never be reached, really, because 4535fe43ed97SLars Ellenberg * we first send the uuids, then the current state. 4536fe43ed97SLars Ellenberg * 4537fe43ed97SLars Ellenberg * In this scenario, we already dropped the connection hard 4538fe43ed97SLars Ellenberg * when we received the unsuitable uuids (receive_uuids(). 4539fe43ed97SLars Ellenberg * 4540fe43ed97SLars Ellenberg * Should we want to change this, that is: not drop the connection in 4541fe43ed97SLars Ellenberg * receive_uuids() already, then we would need to add a branch here 4542fe43ed97SLars Ellenberg * that aborts the attach of "unsuitable uuids" on the peer in case 4543fe43ed97SLars Ellenberg * this node is currently Diskless Primary. 4544fe43ed97SLars Ellenberg */ 4545fe43ed97SLars Ellenberg 4546b30ab791SAndreas Gruenbacher if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && 4547b30ab791SAndreas Gruenbacher get_ldev_if_state(device, D_NEGOTIATING)) { 4548b411b363SPhilipp Reisner int cr; /* consider resync */ 4549b411b363SPhilipp Reisner 4550b411b363SPhilipp Reisner /* if we established a new connection */ 45514ac4aadaSLars Ellenberg cr = (os.conn < C_CONNECTED); 4552b411b363SPhilipp Reisner /* if we had an established connection 4553b411b363SPhilipp Reisner * and one of the nodes newly attaches a disk */ 45544ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4555b411b363SPhilipp Reisner (peer_state.disk == D_NEGOTIATING || 45564ac4aadaSLars Ellenberg os.disk == D_NEGOTIATING)); 4557b411b363SPhilipp Reisner /* if we have both been inconsistent, and the peer has been 4558a2823ea9SLars Ellenberg * forced to be UpToDate with --force */ 4559b30ab791SAndreas Gruenbacher cr |= test_bit(CONSIDER_RESYNC, &device->flags); 4560b411b363SPhilipp Reisner /* if we had been plain connected, and the admin requested to 4561b411b363SPhilipp Reisner * start a sync by "invalidate" or "invalidate-remote" */ 45624ac4aadaSLars Ellenberg cr |= (os.conn == C_CONNECTED && 4563b411b363SPhilipp Reisner (peer_state.conn >= C_STARTING_SYNC_S && 4564b411b363SPhilipp Reisner peer_state.conn <= C_WF_BITMAP_T)); 4565b411b363SPhilipp Reisner 4566b411b363SPhilipp Reisner if (cr) 456769a22773SAndreas Gruenbacher ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk); 4568b411b363SPhilipp Reisner 4569b30ab791SAndreas Gruenbacher put_ldev(device); 45704ac4aadaSLars Ellenberg if (ns.conn == C_MASK) { 45714ac4aadaSLars Ellenberg ns.conn = C_CONNECTED; 4572b30ab791SAndreas Gruenbacher if (device->state.disk == D_NEGOTIATING) { 4573b30ab791SAndreas Gruenbacher drbd_force_state(device, NS(disk, D_FAILED)); 4574b411b363SPhilipp Reisner } else if (peer_state.disk == D_NEGOTIATING) { 4575d0180171SAndreas Gruenbacher drbd_err(device, "Disk attach process on the peer node was aborted.\n"); 4576b411b363SPhilipp Reisner peer_state.disk = D_DISKLESS; 4577580b9767SLars Ellenberg real_peer_disk = D_DISKLESS; 4578b411b363SPhilipp Reisner } else { 45799f4fe9adSAndreas Gruenbacher if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags)) 458082bc0194SAndreas Gruenbacher return -EIO; 45810b0ba1efSAndreas Gruenbacher D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS); 45829f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 458382bc0194SAndreas Gruenbacher return -EIO; 4584b411b363SPhilipp Reisner } 4585b411b363SPhilipp Reisner } 4586b411b363SPhilipp Reisner } 4587b411b363SPhilipp Reisner 45880500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 4589b30ab791SAndreas Gruenbacher if (os.i != drbd_read_state(device).i) 4590b411b363SPhilipp Reisner goto retry; 4591b30ab791SAndreas Gruenbacher clear_bit(CONSIDER_RESYNC, &device->flags); 4592b411b363SPhilipp Reisner ns.peer = peer_state.role; 4593b411b363SPhilipp Reisner ns.pdsk = real_peer_disk; 4594b411b363SPhilipp Reisner ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 45954ac4aadaSLars Ellenberg if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 4596b30ab791SAndreas Gruenbacher ns.disk = device->new_state_tmp.disk; 45974ac4aadaSLars Ellenberg cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); 4598b30ab791SAndreas Gruenbacher if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && 4599b30ab791SAndreas Gruenbacher test_bit(NEW_CUR_UUID, &device->flags)) { 46008554df1cSAndreas Gruenbacher /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this 4601481c6f50SPhilipp Reisner for temporal network outages! */ 46020500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4603d0180171SAndreas Gruenbacher drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); 46049f4fe9adSAndreas Gruenbacher tl_clear(peer_device->connection); 4605b30ab791SAndreas Gruenbacher drbd_uuid_new_current(device); 4606b30ab791SAndreas Gruenbacher clear_bit(NEW_CUR_UUID, &device->flags); 46079f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD); 460882bc0194SAndreas Gruenbacher return -EIO; 4609481c6f50SPhilipp Reisner } 4610b30ab791SAndreas Gruenbacher rv = _drbd_set_state(device, ns, cs_flags, NULL); 4611b30ab791SAndreas Gruenbacher ns = drbd_read_state(device); 46120500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 4613b411b363SPhilipp Reisner 4614b411b363SPhilipp Reisner if (rv < SS_SUCCESS) { 46159f4fe9adSAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); 461682bc0194SAndreas Gruenbacher return -EIO; 4617b411b363SPhilipp Reisner } 4618b411b363SPhilipp Reisner 46194ac4aadaSLars Ellenberg if (os.conn > C_WF_REPORT_PARAMS) { 46204ac4aadaSLars Ellenberg if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 4621b411b363SPhilipp Reisner peer_state.disk != D_NEGOTIATING ) { 4622b411b363SPhilipp Reisner /* we want resync, peer has not yet decided to sync... */ 4623b411b363SPhilipp Reisner /* Nowadays only used when forcing a node into primary role and 4624b411b363SPhilipp Reisner setting its disk to UpToDate with that */ 462569a22773SAndreas Gruenbacher drbd_send_uuids(peer_device); 462669a22773SAndreas Gruenbacher drbd_send_current_state(peer_device); 4627b411b363SPhilipp Reisner } 4628b411b363SPhilipp Reisner } 4629b411b363SPhilipp Reisner 4630b30ab791SAndreas Gruenbacher clear_bit(DISCARD_MY_DATA, &device->flags); 4631b411b363SPhilipp Reisner 4632b30ab791SAndreas Gruenbacher drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */ 4633b411b363SPhilipp Reisner 463482bc0194SAndreas Gruenbacher return 0; 4635b411b363SPhilipp Reisner } 4636b411b363SPhilipp Reisner 4637bde89a9eSAndreas Gruenbacher static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi) 4638b411b363SPhilipp Reisner { 46399f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4640b30ab791SAndreas Gruenbacher struct drbd_device *device; 4641e658983aSAndreas Gruenbacher struct p_rs_uuid *p = pi->data; 46424a76b161SAndreas Gruenbacher 46439f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 46449f4fe9adSAndreas Gruenbacher if (!peer_device) 46454a76b161SAndreas Gruenbacher return -EIO; 46469f4fe9adSAndreas Gruenbacher device = peer_device->device; 4647b411b363SPhilipp Reisner 4648b30ab791SAndreas Gruenbacher wait_event(device->misc_wait, 4649b30ab791SAndreas Gruenbacher device->state.conn == C_WF_SYNC_UUID || 4650b30ab791SAndreas Gruenbacher device->state.conn == C_BEHIND || 4651b30ab791SAndreas Gruenbacher device->state.conn < C_CONNECTED || 4652b30ab791SAndreas Gruenbacher device->state.disk < D_NEGOTIATING); 4653b411b363SPhilipp Reisner 46540b0ba1efSAndreas Gruenbacher /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */ 4655b411b363SPhilipp Reisner 4656b411b363SPhilipp Reisner /* Here the _drbd_uuid_ functions are right, current should 4657b411b363SPhilipp Reisner _not_ be rotated into the history */ 4658b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_NEGOTIATING)) { 4659b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid)); 4660b30ab791SAndreas Gruenbacher _drbd_uuid_set(device, UI_BITMAP, 0UL); 4661b411b363SPhilipp Reisner 4662b30ab791SAndreas Gruenbacher drbd_print_uuids(device, "updated sync uuid"); 4663b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_TARGET); 4664b411b363SPhilipp Reisner 4665b30ab791SAndreas Gruenbacher put_ldev(device); 4666b411b363SPhilipp Reisner } else 4667d0180171SAndreas Gruenbacher drbd_err(device, "Ignoring SyncUUID packet!\n"); 4668b411b363SPhilipp Reisner 466982bc0194SAndreas Gruenbacher return 0; 4670b411b363SPhilipp Reisner } 4671b411b363SPhilipp Reisner 46722c46407dSAndreas Gruenbacher /** 46732c46407dSAndreas Gruenbacher * receive_bitmap_plain 46742c46407dSAndreas Gruenbacher * 46752c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 46762c46407dSAndreas Gruenbacher * code upon failure. 46772c46407dSAndreas Gruenbacher */ 46782c46407dSAndreas Gruenbacher static int 467969a22773SAndreas Gruenbacher receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size, 4680e658983aSAndreas Gruenbacher unsigned long *p, struct bm_xfer_ctx *c) 4681b411b363SPhilipp Reisner { 468250d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - 468369a22773SAndreas Gruenbacher drbd_header_size(peer_device->connection); 4684e658983aSAndreas Gruenbacher unsigned int num_words = min_t(size_t, data_size / sizeof(*p), 468550d0b1adSAndreas Gruenbacher c->bm_words - c->word_offset); 4686e658983aSAndreas Gruenbacher unsigned int want = num_words * sizeof(*p); 46872c46407dSAndreas Gruenbacher int err; 4688b411b363SPhilipp Reisner 468950d0b1adSAndreas Gruenbacher if (want != size) { 469069a22773SAndreas Gruenbacher drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size); 46912c46407dSAndreas Gruenbacher return -EIO; 4692b411b363SPhilipp Reisner } 4693b411b363SPhilipp Reisner if (want == 0) 46942c46407dSAndreas Gruenbacher return 0; 469569a22773SAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, want); 469682bc0194SAndreas Gruenbacher if (err) 46972c46407dSAndreas Gruenbacher return err; 4698b411b363SPhilipp Reisner 469969a22773SAndreas Gruenbacher drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p); 4700b411b363SPhilipp Reisner 4701b411b363SPhilipp Reisner c->word_offset += num_words; 4702b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG; 4703b411b363SPhilipp Reisner if (c->bit_offset > c->bm_bits) 4704b411b363SPhilipp Reisner c->bit_offset = c->bm_bits; 4705b411b363SPhilipp Reisner 47062c46407dSAndreas Gruenbacher return 1; 4707b411b363SPhilipp Reisner } 4708b411b363SPhilipp Reisner 4709a02d1240SAndreas Gruenbacher static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p) 4710a02d1240SAndreas Gruenbacher { 4711a02d1240SAndreas Gruenbacher return (enum drbd_bitmap_code)(p->encoding & 0x0f); 4712a02d1240SAndreas Gruenbacher } 4713a02d1240SAndreas Gruenbacher 4714a02d1240SAndreas Gruenbacher static int dcbp_get_start(struct p_compressed_bm *p) 4715a02d1240SAndreas Gruenbacher { 4716a02d1240SAndreas Gruenbacher return (p->encoding & 0x80) != 0; 4717a02d1240SAndreas Gruenbacher } 4718a02d1240SAndreas Gruenbacher 4719a02d1240SAndreas Gruenbacher static int dcbp_get_pad_bits(struct p_compressed_bm *p) 4720a02d1240SAndreas Gruenbacher { 4721a02d1240SAndreas Gruenbacher return (p->encoding >> 4) & 0x7; 4722a02d1240SAndreas Gruenbacher } 4723a02d1240SAndreas Gruenbacher 47242c46407dSAndreas Gruenbacher /** 47252c46407dSAndreas Gruenbacher * recv_bm_rle_bits 47262c46407dSAndreas Gruenbacher * 47272c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 47282c46407dSAndreas Gruenbacher * code upon failure. 47292c46407dSAndreas Gruenbacher */ 47302c46407dSAndreas Gruenbacher static int 473169a22773SAndreas Gruenbacher recv_bm_rle_bits(struct drbd_peer_device *peer_device, 4732b411b363SPhilipp Reisner struct p_compressed_bm *p, 4733c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4734c6d25cfeSPhilipp Reisner unsigned int len) 4735b411b363SPhilipp Reisner { 4736b411b363SPhilipp Reisner struct bitstream bs; 4737b411b363SPhilipp Reisner u64 look_ahead; 4738b411b363SPhilipp Reisner u64 rl; 4739b411b363SPhilipp Reisner u64 tmp; 4740b411b363SPhilipp Reisner unsigned long s = c->bit_offset; 4741b411b363SPhilipp Reisner unsigned long e; 4742a02d1240SAndreas Gruenbacher int toggle = dcbp_get_start(p); 4743b411b363SPhilipp Reisner int have; 4744b411b363SPhilipp Reisner int bits; 4745b411b363SPhilipp Reisner 4746a02d1240SAndreas Gruenbacher bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p)); 4747b411b363SPhilipp Reisner 4748b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &look_ahead, 64); 4749b411b363SPhilipp Reisner if (bits < 0) 47502c46407dSAndreas Gruenbacher return -EIO; 4751b411b363SPhilipp Reisner 4752b411b363SPhilipp Reisner for (have = bits; have > 0; s += rl, toggle = !toggle) { 4753b411b363SPhilipp Reisner bits = vli_decode_bits(&rl, look_ahead); 4754b411b363SPhilipp Reisner if (bits <= 0) 47552c46407dSAndreas Gruenbacher return -EIO; 4756b411b363SPhilipp Reisner 4757b411b363SPhilipp Reisner if (toggle) { 4758b411b363SPhilipp Reisner e = s + rl -1; 4759b411b363SPhilipp Reisner if (e >= c->bm_bits) { 476069a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); 47612c46407dSAndreas Gruenbacher return -EIO; 4762b411b363SPhilipp Reisner } 476369a22773SAndreas Gruenbacher _drbd_bm_set_bits(peer_device->device, s, e); 4764b411b363SPhilipp Reisner } 4765b411b363SPhilipp Reisner 4766b411b363SPhilipp Reisner if (have < bits) { 476769a22773SAndreas Gruenbacher drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", 4768b411b363SPhilipp Reisner have, bits, look_ahead, 4769b411b363SPhilipp Reisner (unsigned int)(bs.cur.b - p->code), 4770b411b363SPhilipp Reisner (unsigned int)bs.buf_len); 47712c46407dSAndreas Gruenbacher return -EIO; 4772b411b363SPhilipp Reisner } 4773d2da5b0cSLars Ellenberg /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */ 4774d2da5b0cSLars Ellenberg if (likely(bits < 64)) 4775b411b363SPhilipp Reisner look_ahead >>= bits; 4776d2da5b0cSLars Ellenberg else 4777d2da5b0cSLars Ellenberg look_ahead = 0; 4778b411b363SPhilipp Reisner have -= bits; 4779b411b363SPhilipp Reisner 4780b411b363SPhilipp Reisner bits = bitstream_get_bits(&bs, &tmp, 64 - have); 4781b411b363SPhilipp Reisner if (bits < 0) 47822c46407dSAndreas Gruenbacher return -EIO; 4783b411b363SPhilipp Reisner look_ahead |= tmp << have; 4784b411b363SPhilipp Reisner have += bits; 4785b411b363SPhilipp Reisner } 4786b411b363SPhilipp Reisner 4787b411b363SPhilipp Reisner c->bit_offset = s; 4788b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c); 4789b411b363SPhilipp Reisner 47902c46407dSAndreas Gruenbacher return (s != c->bm_bits); 4791b411b363SPhilipp Reisner } 4792b411b363SPhilipp Reisner 47932c46407dSAndreas Gruenbacher /** 47942c46407dSAndreas Gruenbacher * decode_bitmap_c 47952c46407dSAndreas Gruenbacher * 47962c46407dSAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 47972c46407dSAndreas Gruenbacher * code upon failure. 47982c46407dSAndreas Gruenbacher */ 47992c46407dSAndreas Gruenbacher static int 480069a22773SAndreas Gruenbacher decode_bitmap_c(struct drbd_peer_device *peer_device, 4801b411b363SPhilipp Reisner struct p_compressed_bm *p, 4802c6d25cfeSPhilipp Reisner struct bm_xfer_ctx *c, 4803c6d25cfeSPhilipp Reisner unsigned int len) 4804b411b363SPhilipp Reisner { 4805a02d1240SAndreas Gruenbacher if (dcbp_get_code(p) == RLE_VLI_Bits) 480669a22773SAndreas Gruenbacher return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p)); 4807b411b363SPhilipp Reisner 4808b411b363SPhilipp Reisner /* other variants had been implemented for evaluation, 4809b411b363SPhilipp Reisner * but have been dropped as this one turned out to be "best" 4810b411b363SPhilipp Reisner * during all our tests. */ 4811b411b363SPhilipp Reisner 481269a22773SAndreas Gruenbacher drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding); 481369a22773SAndreas Gruenbacher conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 48142c46407dSAndreas Gruenbacher return -EIO; 4815b411b363SPhilipp Reisner } 4816b411b363SPhilipp Reisner 4817b30ab791SAndreas Gruenbacher void INFO_bm_xfer_stats(struct drbd_device *device, 4818b411b363SPhilipp Reisner const char *direction, struct bm_xfer_ctx *c) 4819b411b363SPhilipp Reisner { 4820b411b363SPhilipp Reisner /* what would it take to transfer it "plaintext" */ 4821a6b32bc3SAndreas Gruenbacher unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); 482250d0b1adSAndreas Gruenbacher unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 482350d0b1adSAndreas Gruenbacher unsigned int plain = 482450d0b1adSAndreas Gruenbacher header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + 482550d0b1adSAndreas Gruenbacher c->bm_words * sizeof(unsigned long); 482650d0b1adSAndreas Gruenbacher unsigned int total = c->bytes[0] + c->bytes[1]; 482750d0b1adSAndreas Gruenbacher unsigned int r; 4828b411b363SPhilipp Reisner 4829b411b363SPhilipp Reisner /* total can not be zero. but just in case: */ 4830b411b363SPhilipp Reisner if (total == 0) 4831b411b363SPhilipp Reisner return; 4832b411b363SPhilipp Reisner 4833b411b363SPhilipp Reisner /* don't report if not compressed */ 4834b411b363SPhilipp Reisner if (total >= plain) 4835b411b363SPhilipp Reisner return; 4836b411b363SPhilipp Reisner 4837b411b363SPhilipp Reisner /* total < plain. check for overflow, still */ 4838b411b363SPhilipp Reisner r = (total > UINT_MAX/1000) ? (total / (plain/1000)) 4839b411b363SPhilipp Reisner : (1000 * total / plain); 4840b411b363SPhilipp Reisner 4841b411b363SPhilipp Reisner if (r > 1000) 4842b411b363SPhilipp Reisner r = 1000; 4843b411b363SPhilipp Reisner 4844b411b363SPhilipp Reisner r = 1000 - r; 4845d0180171SAndreas Gruenbacher drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " 4846b411b363SPhilipp Reisner "total %u; compression: %u.%u%%\n", 4847b411b363SPhilipp Reisner direction, 4848b411b363SPhilipp Reisner c->bytes[1], c->packets[1], 4849b411b363SPhilipp Reisner c->bytes[0], c->packets[0], 4850b411b363SPhilipp Reisner total, r/10, r % 10); 4851b411b363SPhilipp Reisner } 4852b411b363SPhilipp Reisner 4853b411b363SPhilipp Reisner /* Since we are processing the bitfield from lower addresses to higher, 4854b411b363SPhilipp Reisner it does not matter if the process it in 32 bit chunks or 64 bit 4855b411b363SPhilipp Reisner chunks as long as it is little endian. (Understand it as byte stream, 4856b411b363SPhilipp Reisner beginning with the lowest byte...) If we would use big endian 4857b411b363SPhilipp Reisner we would need to process it from the highest address to the lowest, 4858b411b363SPhilipp Reisner in order to be agnostic to the 32 vs 64 bits issue. 4859b411b363SPhilipp Reisner 4860b411b363SPhilipp Reisner returns 0 on failure, 1 if we successfully received it. */ 4861bde89a9eSAndreas Gruenbacher static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi) 4862b411b363SPhilipp Reisner { 48639f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4864b30ab791SAndreas Gruenbacher struct drbd_device *device; 4865b411b363SPhilipp Reisner struct bm_xfer_ctx c; 48662c46407dSAndreas Gruenbacher int err; 48674a76b161SAndreas Gruenbacher 48689f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 48699f4fe9adSAndreas Gruenbacher if (!peer_device) 48704a76b161SAndreas Gruenbacher return -EIO; 48719f4fe9adSAndreas Gruenbacher device = peer_device->device; 4872b411b363SPhilipp Reisner 4873b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED); 487420ceb2b2SLars Ellenberg /* you are supposed to send additional out-of-sync information 487520ceb2b2SLars Ellenberg * if you actually set bits during this phase */ 4876b411b363SPhilipp Reisner 4877b411b363SPhilipp Reisner c = (struct bm_xfer_ctx) { 4878b30ab791SAndreas Gruenbacher .bm_bits = drbd_bm_bits(device), 4879b30ab791SAndreas Gruenbacher .bm_words = drbd_bm_words(device), 4880b411b363SPhilipp Reisner }; 4881b411b363SPhilipp Reisner 48822c46407dSAndreas Gruenbacher for(;;) { 4883e658983aSAndreas Gruenbacher if (pi->cmd == P_BITMAP) 488469a22773SAndreas Gruenbacher err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c); 4885e658983aSAndreas Gruenbacher else if (pi->cmd == P_COMPRESSED_BITMAP) { 4886b411b363SPhilipp Reisner /* MAYBE: sanity check that we speak proto >= 90, 4887b411b363SPhilipp Reisner * and the feature is enabled! */ 4888e658983aSAndreas Gruenbacher struct p_compressed_bm *p = pi->data; 4889b411b363SPhilipp Reisner 4890bde89a9eSAndreas Gruenbacher if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) { 4891d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too large\n"); 489282bc0194SAndreas Gruenbacher err = -EIO; 4893b411b363SPhilipp Reisner goto out; 4894b411b363SPhilipp Reisner } 4895e658983aSAndreas Gruenbacher if (pi->size <= sizeof(*p)) { 4896d0180171SAndreas Gruenbacher drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size); 489782bc0194SAndreas Gruenbacher err = -EIO; 489878fcbdaeSAndreas Gruenbacher goto out; 4899b411b363SPhilipp Reisner } 49009f4fe9adSAndreas Gruenbacher err = drbd_recv_all(peer_device->connection, p, pi->size); 4901e658983aSAndreas Gruenbacher if (err) 4902e658983aSAndreas Gruenbacher goto out; 490369a22773SAndreas Gruenbacher err = decode_bitmap_c(peer_device, p, &c, pi->size); 4904b411b363SPhilipp Reisner } else { 4905d0180171SAndreas Gruenbacher drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd); 490682bc0194SAndreas Gruenbacher err = -EIO; 4907b411b363SPhilipp Reisner goto out; 4908b411b363SPhilipp Reisner } 4909b411b363SPhilipp Reisner 4910e2857216SAndreas Gruenbacher c.packets[pi->cmd == P_BITMAP]++; 4911bde89a9eSAndreas Gruenbacher c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size; 4912b411b363SPhilipp Reisner 49132c46407dSAndreas Gruenbacher if (err <= 0) { 49142c46407dSAndreas Gruenbacher if (err < 0) 49152c46407dSAndreas Gruenbacher goto out; 4916b411b363SPhilipp Reisner break; 49172c46407dSAndreas Gruenbacher } 49189f4fe9adSAndreas Gruenbacher err = drbd_recv_header(peer_device->connection, pi); 491982bc0194SAndreas Gruenbacher if (err) 4920b411b363SPhilipp Reisner goto out; 49212c46407dSAndreas Gruenbacher } 4922b411b363SPhilipp Reisner 4923b30ab791SAndreas Gruenbacher INFO_bm_xfer_stats(device, "receive", &c); 4924b411b363SPhilipp Reisner 4925b30ab791SAndreas Gruenbacher if (device->state.conn == C_WF_BITMAP_T) { 4926de1f8e4aSAndreas Gruenbacher enum drbd_state_rv rv; 4927de1f8e4aSAndreas Gruenbacher 4928b30ab791SAndreas Gruenbacher err = drbd_send_bitmap(device); 492982bc0194SAndreas Gruenbacher if (err) 4930b411b363SPhilipp Reisner goto out; 4931b411b363SPhilipp Reisner /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ 4932b30ab791SAndreas Gruenbacher rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); 49330b0ba1efSAndreas Gruenbacher D_ASSERT(device, rv == SS_SUCCESS); 4934b30ab791SAndreas Gruenbacher } else if (device->state.conn != C_WF_BITMAP_S) { 4935b411b363SPhilipp Reisner /* admin may have requested C_DISCONNECTING, 4936b411b363SPhilipp Reisner * other threads may have noticed network errors */ 4937d0180171SAndreas Gruenbacher drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n", 4938b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4939b411b363SPhilipp Reisner } 494082bc0194SAndreas Gruenbacher err = 0; 4941b411b363SPhilipp Reisner 4942b411b363SPhilipp Reisner out: 4943b30ab791SAndreas Gruenbacher drbd_bm_unlock(device); 4944b30ab791SAndreas Gruenbacher if (!err && device->state.conn == C_WF_BITMAP_S) 4945b30ab791SAndreas Gruenbacher drbd_start_resync(device, C_SYNC_SOURCE); 494682bc0194SAndreas Gruenbacher return err; 4947b411b363SPhilipp Reisner } 4948b411b363SPhilipp Reisner 4949bde89a9eSAndreas Gruenbacher static int receive_skip(struct drbd_connection *connection, struct packet_info *pi) 4950b411b363SPhilipp Reisner { 49511ec861ebSAndreas Gruenbacher drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n", 4952e2857216SAndreas Gruenbacher pi->cmd, pi->size); 4953b411b363SPhilipp Reisner 4954bde89a9eSAndreas Gruenbacher return ignore_remaining_packet(connection, pi); 4955b411b363SPhilipp Reisner } 4956b411b363SPhilipp Reisner 4957bde89a9eSAndreas Gruenbacher static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi) 4958b411b363SPhilipp Reisner { 4959b411b363SPhilipp Reisner /* Make sure we've acked all the TCP data associated 4960b411b363SPhilipp Reisner * with the data requests being unplugged */ 4961bde89a9eSAndreas Gruenbacher drbd_tcp_quickack(connection->data.socket); 4962b411b363SPhilipp Reisner 496382bc0194SAndreas Gruenbacher return 0; 4964b411b363SPhilipp Reisner } 4965b411b363SPhilipp Reisner 4966bde89a9eSAndreas Gruenbacher static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi) 496773a01a18SPhilipp Reisner { 49689f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 4969b30ab791SAndreas Gruenbacher struct drbd_device *device; 4970e658983aSAndreas Gruenbacher struct p_block_desc *p = pi->data; 49714a76b161SAndreas Gruenbacher 49729f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 49739f4fe9adSAndreas Gruenbacher if (!peer_device) 49744a76b161SAndreas Gruenbacher return -EIO; 49759f4fe9adSAndreas Gruenbacher device = peer_device->device; 497673a01a18SPhilipp Reisner 4977b30ab791SAndreas Gruenbacher switch (device->state.conn) { 4978f735e363SLars Ellenberg case C_WF_SYNC_UUID: 4979f735e363SLars Ellenberg case C_WF_BITMAP_T: 4980f735e363SLars Ellenberg case C_BEHIND: 4981f735e363SLars Ellenberg break; 4982f735e363SLars Ellenberg default: 4983d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n", 4984b30ab791SAndreas Gruenbacher drbd_conn_str(device->state.conn)); 4985f735e363SLars Ellenberg } 4986f735e363SLars Ellenberg 4987b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); 498873a01a18SPhilipp Reisner 498982bc0194SAndreas Gruenbacher return 0; 499073a01a18SPhilipp Reisner } 499173a01a18SPhilipp Reisner 4992700ca8c0SPhilipp Reisner static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi) 4993700ca8c0SPhilipp Reisner { 4994700ca8c0SPhilipp Reisner struct drbd_peer_device *peer_device; 4995700ca8c0SPhilipp Reisner struct p_block_desc *p = pi->data; 4996700ca8c0SPhilipp Reisner struct drbd_device *device; 4997700ca8c0SPhilipp Reisner sector_t sector; 4998700ca8c0SPhilipp Reisner int size, err = 0; 4999700ca8c0SPhilipp Reisner 5000700ca8c0SPhilipp Reisner peer_device = conn_peer_device(connection, pi->vnr); 5001700ca8c0SPhilipp Reisner if (!peer_device) 5002700ca8c0SPhilipp Reisner return -EIO; 5003700ca8c0SPhilipp Reisner device = peer_device->device; 5004700ca8c0SPhilipp Reisner 5005700ca8c0SPhilipp Reisner sector = be64_to_cpu(p->sector); 5006700ca8c0SPhilipp Reisner size = be32_to_cpu(p->blksize); 5007700ca8c0SPhilipp Reisner 5008700ca8c0SPhilipp Reisner dec_rs_pending(device); 5009700ca8c0SPhilipp Reisner 5010700ca8c0SPhilipp Reisner if (get_ldev(device)) { 5011700ca8c0SPhilipp Reisner struct drbd_peer_request *peer_req; 501245c21793SChristoph Hellwig const int op = REQ_OP_WRITE_ZEROES; 5013700ca8c0SPhilipp Reisner 5014700ca8c0SPhilipp Reisner peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, 50159104d31aSLars Ellenberg size, 0, GFP_NOIO); 5016700ca8c0SPhilipp Reisner if (!peer_req) { 5017700ca8c0SPhilipp Reisner put_ldev(device); 5018700ca8c0SPhilipp Reisner return -ENOMEM; 5019700ca8c0SPhilipp Reisner } 5020700ca8c0SPhilipp Reisner 5021700ca8c0SPhilipp Reisner peer_req->w.cb = e_end_resync_block; 5022700ca8c0SPhilipp Reisner peer_req->submit_jif = jiffies; 5023f31e583aSLars Ellenberg peer_req->flags |= EE_TRIM; 5024700ca8c0SPhilipp Reisner 5025700ca8c0SPhilipp Reisner spin_lock_irq(&device->resource->req_lock); 5026700ca8c0SPhilipp Reisner list_add_tail(&peer_req->w.list, &device->sync_ee); 5027700ca8c0SPhilipp Reisner spin_unlock_irq(&device->resource->req_lock); 5028700ca8c0SPhilipp Reisner 5029700ca8c0SPhilipp Reisner atomic_add(pi->size >> 9, &device->rs_sect_ev); 5030700ca8c0SPhilipp Reisner err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR); 5031700ca8c0SPhilipp Reisner 5032700ca8c0SPhilipp Reisner if (err) { 5033700ca8c0SPhilipp Reisner spin_lock_irq(&device->resource->req_lock); 5034700ca8c0SPhilipp Reisner list_del(&peer_req->w.list); 5035700ca8c0SPhilipp Reisner spin_unlock_irq(&device->resource->req_lock); 5036700ca8c0SPhilipp Reisner 5037700ca8c0SPhilipp Reisner drbd_free_peer_req(device, peer_req); 5038700ca8c0SPhilipp Reisner put_ldev(device); 5039700ca8c0SPhilipp Reisner err = 0; 5040700ca8c0SPhilipp Reisner goto fail; 5041700ca8c0SPhilipp Reisner } 5042700ca8c0SPhilipp Reisner 5043700ca8c0SPhilipp Reisner inc_unacked(device); 5044700ca8c0SPhilipp Reisner 5045700ca8c0SPhilipp Reisner /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(), 5046700ca8c0SPhilipp Reisner as well as drbd_rs_complete_io() */ 5047700ca8c0SPhilipp Reisner } else { 5048700ca8c0SPhilipp Reisner fail: 5049700ca8c0SPhilipp Reisner drbd_rs_complete_io(device, sector); 5050700ca8c0SPhilipp Reisner drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER); 5051700ca8c0SPhilipp Reisner } 5052700ca8c0SPhilipp Reisner 5053700ca8c0SPhilipp Reisner atomic_add(size >> 9, &device->rs_sect_in); 5054700ca8c0SPhilipp Reisner 5055700ca8c0SPhilipp Reisner return err; 5056700ca8c0SPhilipp Reisner } 5057700ca8c0SPhilipp Reisner 505802918be2SPhilipp Reisner struct data_cmd { 505902918be2SPhilipp Reisner int expect_payload; 50609104d31aSLars Ellenberg unsigned int pkt_size; 5061bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *, struct packet_info *); 5062b411b363SPhilipp Reisner }; 5063b411b363SPhilipp Reisner 506402918be2SPhilipp Reisner static struct data_cmd drbd_cmd_handler[] = { 506502918be2SPhilipp Reisner [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, 506602918be2SPhilipp Reisner [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, 506702918be2SPhilipp Reisner [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , 506802918be2SPhilipp Reisner [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , 5069e658983aSAndreas Gruenbacher [P_BITMAP] = { 1, 0, receive_bitmap } , 5070e658983aSAndreas Gruenbacher [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } , 5071e658983aSAndreas Gruenbacher [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote }, 507202918be2SPhilipp Reisner [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 507302918be2SPhilipp Reisner [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 5074e658983aSAndreas Gruenbacher [P_SYNC_PARAM] = { 1, 0, receive_SyncParam }, 5075e658983aSAndreas Gruenbacher [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam }, 507602918be2SPhilipp Reisner [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, 507702918be2SPhilipp Reisner [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, 507802918be2SPhilipp Reisner [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, 507902918be2SPhilipp Reisner [P_STATE] = { 0, sizeof(struct p_state), receive_state }, 508002918be2SPhilipp Reisner [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, 508102918be2SPhilipp Reisner [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, 508202918be2SPhilipp Reisner [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 508302918be2SPhilipp Reisner [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 508402918be2SPhilipp Reisner [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, 5085700ca8c0SPhilipp Reisner [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest }, 508602918be2SPhilipp Reisner [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, 508773a01a18SPhilipp Reisner [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, 50884a76b161SAndreas Gruenbacher [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, 5089036b17eaSPhilipp Reisner [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, 5090a0fb3c47SLars Ellenberg [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, 5091f31e583aSLars Ellenberg [P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data }, 5092700ca8c0SPhilipp Reisner [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, 50939104d31aSLars Ellenberg [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data }, 509402918be2SPhilipp Reisner }; 509502918be2SPhilipp Reisner 5096bde89a9eSAndreas Gruenbacher static void drbdd(struct drbd_connection *connection) 5097b411b363SPhilipp Reisner { 509877351055SPhilipp Reisner struct packet_info pi; 509902918be2SPhilipp Reisner size_t shs; /* sub header size */ 510082bc0194SAndreas Gruenbacher int err; 5101b411b363SPhilipp Reisner 5102bde89a9eSAndreas Gruenbacher while (get_t_state(&connection->receiver) == RUNNING) { 51039104d31aSLars Ellenberg struct data_cmd const *cmd; 5104deebe195SAndreas Gruenbacher 5105bde89a9eSAndreas Gruenbacher drbd_thread_current_set_cpu(&connection->receiver); 5106c51a0ef3SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug); 5107c51a0ef3SLars Ellenberg if (drbd_recv_header_maybe_unplug(connection, &pi)) 510802918be2SPhilipp Reisner goto err_out; 510902918be2SPhilipp Reisner 5110deebe195SAndreas Gruenbacher cmd = &drbd_cmd_handler[pi.cmd]; 51114a76b161SAndreas Gruenbacher if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) { 51121ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected data packet %s (0x%04x)", 51132fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 511402918be2SPhilipp Reisner goto err_out; 51150b33a916SLars Ellenberg } 5116b411b363SPhilipp Reisner 5117e658983aSAndreas Gruenbacher shs = cmd->pkt_size; 51189104d31aSLars Ellenberg if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME) 51199104d31aSLars Ellenberg shs += sizeof(struct o_qlim); 5120e658983aSAndreas Gruenbacher if (pi.size > shs && !cmd->expect_payload) { 51211ec861ebSAndreas Gruenbacher drbd_err(connection, "No payload expected %s l:%d\n", 51222fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.size); 5123c13f7e1aSLars Ellenberg goto err_out; 5124c13f7e1aSLars Ellenberg } 51259104d31aSLars Ellenberg if (pi.size < shs) { 51269104d31aSLars Ellenberg drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n", 51279104d31aSLars Ellenberg cmdname(pi.cmd), (int)shs, pi.size); 51289104d31aSLars Ellenberg goto err_out; 51299104d31aSLars Ellenberg } 5130c13f7e1aSLars Ellenberg 5131c13f7e1aSLars Ellenberg if (shs) { 5132944410e9SLars Ellenberg update_receiver_timing_details(connection, drbd_recv_all_warn); 5133bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, pi.data, shs); 5134a5c31904SAndreas Gruenbacher if (err) 513502918be2SPhilipp Reisner goto err_out; 5136e2857216SAndreas Gruenbacher pi.size -= shs; 5137b411b363SPhilipp Reisner } 513802918be2SPhilipp Reisner 5139944410e9SLars Ellenberg update_receiver_timing_details(connection, cmd->fn); 5140bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 51414a76b161SAndreas Gruenbacher if (err) { 51421ec861ebSAndreas Gruenbacher drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 51439f5bdc33SAndreas Gruenbacher cmdname(pi.cmd), err, pi.size); 514402918be2SPhilipp Reisner goto err_out; 514502918be2SPhilipp Reisner } 514602918be2SPhilipp Reisner } 514782bc0194SAndreas Gruenbacher return; 514802918be2SPhilipp Reisner 514902918be2SPhilipp Reisner err_out: 5150bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 5151b411b363SPhilipp Reisner } 5152b411b363SPhilipp Reisner 5153bde89a9eSAndreas Gruenbacher static void conn_disconnect(struct drbd_connection *connection) 5154f70b3511SPhilipp Reisner { 5155c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5156bbeb641cSPhilipp Reisner enum drbd_conns oc; 5157376694a0SPhilipp Reisner int vnr; 5158f70b3511SPhilipp Reisner 5159bde89a9eSAndreas Gruenbacher if (connection->cstate == C_STANDALONE) 5160b411b363SPhilipp Reisner return; 5161b411b363SPhilipp Reisner 5162545752d5SLars Ellenberg /* We are about to start the cleanup after connection loss. 5163545752d5SLars Ellenberg * Make sure drbd_make_request knows about that. 5164545752d5SLars Ellenberg * Usually we should be in some network failure state already, 5165545752d5SLars Ellenberg * but just in case we are not, we fix it up here. 5166545752d5SLars Ellenberg */ 5167bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 5168545752d5SLars Ellenberg 5169668700b4SPhilipp Reisner /* ack_receiver does not clean up anything. it must not interfere, either */ 51701c03e520SPhilipp Reisner drbd_thread_stop(&connection->ack_receiver); 5171668700b4SPhilipp Reisner if (connection->ack_sender) { 5172668700b4SPhilipp Reisner destroy_workqueue(connection->ack_sender); 5173668700b4SPhilipp Reisner connection->ack_sender = NULL; 5174668700b4SPhilipp Reisner } 5175bde89a9eSAndreas Gruenbacher drbd_free_sock(connection); 5176360cc740SPhilipp Reisner 5177c141ebdaSPhilipp Reisner rcu_read_lock(); 5178c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5179c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5180b30ab791SAndreas Gruenbacher kref_get(&device->kref); 5181c141ebdaSPhilipp Reisner rcu_read_unlock(); 518269a22773SAndreas Gruenbacher drbd_disconnected(peer_device); 5183c06ece6bSAndreas Gruenbacher kref_put(&device->kref, drbd_destroy_device); 5184c141ebdaSPhilipp Reisner rcu_read_lock(); 5185c141ebdaSPhilipp Reisner } 5186c141ebdaSPhilipp Reisner rcu_read_unlock(); 5187c141ebdaSPhilipp Reisner 5188bde89a9eSAndreas Gruenbacher if (!list_empty(&connection->current_epoch->list)) 51891ec861ebSAndreas Gruenbacher drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n"); 519012038a3aSPhilipp Reisner /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ 5191bde89a9eSAndreas Gruenbacher atomic_set(&connection->current_epoch->epoch_size, 0); 5192bde89a9eSAndreas Gruenbacher connection->send.seen_any_write_yet = false; 519312038a3aSPhilipp Reisner 51941ec861ebSAndreas Gruenbacher drbd_info(connection, "Connection closed\n"); 5195360cc740SPhilipp Reisner 5196bde89a9eSAndreas Gruenbacher if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN) 5197bde89a9eSAndreas Gruenbacher conn_try_outdate_peer_async(connection); 5198cb703454SPhilipp Reisner 51990500813fSAndreas Gruenbacher spin_lock_irq(&connection->resource->req_lock); 5200bde89a9eSAndreas Gruenbacher oc = connection->cstate; 5201bbeb641cSPhilipp Reisner if (oc >= C_UNCONNECTED) 5202bde89a9eSAndreas Gruenbacher _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 5203bbeb641cSPhilipp Reisner 52040500813fSAndreas Gruenbacher spin_unlock_irq(&connection->resource->req_lock); 5205360cc740SPhilipp Reisner 5206f3dfa40aSLars Ellenberg if (oc == C_DISCONNECTING) 5207bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD); 5208360cc740SPhilipp Reisner } 5209360cc740SPhilipp Reisner 521069a22773SAndreas Gruenbacher static int drbd_disconnected(struct drbd_peer_device *peer_device) 5211360cc740SPhilipp Reisner { 521269a22773SAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5213360cc740SPhilipp Reisner unsigned int i; 5214b411b363SPhilipp Reisner 521585719573SPhilipp Reisner /* wait for current activity to cease. */ 52160500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 5217b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->active_ee); 5218b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->sync_ee); 5219b30ab791SAndreas Gruenbacher _drbd_wait_ee_list_empty(device, &device->read_ee); 52200500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 5221b411b363SPhilipp Reisner 5222b411b363SPhilipp Reisner /* We do not have data structures that would allow us to 5223b411b363SPhilipp Reisner * get the rs_pending_cnt down to 0 again. 5224b411b363SPhilipp Reisner * * On C_SYNC_TARGET we do not have any data structures describing 5225b411b363SPhilipp Reisner * the pending RSDataRequest's we have sent. 5226b411b363SPhilipp Reisner * * On C_SYNC_SOURCE there is no data structure that tracks 5227b411b363SPhilipp Reisner * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. 5228b411b363SPhilipp Reisner * And no, it is not the sum of the reference counts in the 5229b411b363SPhilipp Reisner * resync_LRU. The resync_LRU tracks the whole operation including 5230b411b363SPhilipp Reisner * the disk-IO, while the rs_pending_cnt only tracks the blocks 5231b411b363SPhilipp Reisner * on the fly. */ 5232b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 5233b30ab791SAndreas Gruenbacher device->rs_total = 0; 5234b30ab791SAndreas Gruenbacher device->rs_failed = 0; 5235b30ab791SAndreas Gruenbacher atomic_set(&device->rs_pending_cnt, 0); 5236b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 5237b411b363SPhilipp Reisner 5238b30ab791SAndreas Gruenbacher del_timer_sync(&device->resync_timer); 52392bccef39SKees Cook resync_timer_fn(&device->resync_timer); 5240b411b363SPhilipp Reisner 5241b411b363SPhilipp Reisner /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 5242b411b363SPhilipp Reisner * w_make_resync_request etc. which may still be on the worker queue 5243b411b363SPhilipp Reisner * to be "canceled" */ 5244b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 5245b411b363SPhilipp Reisner 5246b30ab791SAndreas Gruenbacher drbd_finish_peer_reqs(device); 5247b411b363SPhilipp Reisner 5248d10b4ea3SPhilipp Reisner /* This second workqueue flush is necessary, since drbd_finish_peer_reqs() 5249d10b4ea3SPhilipp Reisner might have issued a work again. The one before drbd_finish_peer_reqs() is 5250d10b4ea3SPhilipp Reisner necessary to reclain net_ee in drbd_finish_peer_reqs(). */ 5251b5043c5eSAndreas Gruenbacher drbd_flush_workqueue(&peer_device->connection->sender_work); 5252d10b4ea3SPhilipp Reisner 525308332d73SLars Ellenberg /* need to do it again, drbd_finish_peer_reqs() may have populated it 525408332d73SLars Ellenberg * again via drbd_try_clear_on_disk_bm(). */ 5255b30ab791SAndreas Gruenbacher drbd_rs_cancel_all(device); 5256b411b363SPhilipp Reisner 5257b30ab791SAndreas Gruenbacher kfree(device->p_uuid); 5258b30ab791SAndreas Gruenbacher device->p_uuid = NULL; 5259b411b363SPhilipp Reisner 5260b30ab791SAndreas Gruenbacher if (!drbd_suspended(device)) 526169a22773SAndreas Gruenbacher tl_clear(peer_device->connection); 5262b411b363SPhilipp Reisner 5263b30ab791SAndreas Gruenbacher drbd_md_sync(device); 5264b411b363SPhilipp Reisner 5265be115b69SLars Ellenberg if (get_ldev(device)) { 5266be115b69SLars Ellenberg drbd_bitmap_io(device, &drbd_bm_write_copy_pages, 5267be115b69SLars Ellenberg "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); 5268be115b69SLars Ellenberg put_ldev(device); 5269be115b69SLars Ellenberg } 527020ceb2b2SLars Ellenberg 5271b411b363SPhilipp Reisner /* tcp_close and release of sendpage pages can be deferred. I don't 5272b411b363SPhilipp Reisner * want to use SO_LINGER, because apparently it can be deferred for 5273b411b363SPhilipp Reisner * more than 20 seconds (longest time I checked). 5274b411b363SPhilipp Reisner * 5275b411b363SPhilipp Reisner * Actually we don't care for exactly when the network stack does its 5276b411b363SPhilipp Reisner * put_page(), but release our reference on these pages right here. 5277b411b363SPhilipp Reisner */ 5278b30ab791SAndreas Gruenbacher i = drbd_free_peer_reqs(device, &device->net_ee); 5279b411b363SPhilipp Reisner if (i) 5280d0180171SAndreas Gruenbacher drbd_info(device, "net_ee not empty, killed %u entries\n", i); 5281b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use_by_net); 5282435f0740SLars Ellenberg if (i) 5283d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); 5284b30ab791SAndreas Gruenbacher i = atomic_read(&device->pp_in_use); 5285b411b363SPhilipp Reisner if (i) 5286d0180171SAndreas Gruenbacher drbd_info(device, "pp_in_use = %d, expected 0\n", i); 5287b411b363SPhilipp Reisner 52880b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->read_ee)); 52890b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->active_ee)); 52900b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->sync_ee)); 52910b0ba1efSAndreas Gruenbacher D_ASSERT(device, list_empty(&device->done_ee)); 5292b411b363SPhilipp Reisner 5293360cc740SPhilipp Reisner return 0; 5294b411b363SPhilipp Reisner } 5295b411b363SPhilipp Reisner 5296b411b363SPhilipp Reisner /* 5297b411b363SPhilipp Reisner * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version 5298b411b363SPhilipp Reisner * we can agree on is stored in agreed_pro_version. 5299b411b363SPhilipp Reisner * 5300b411b363SPhilipp Reisner * feature flags and the reserved array should be enough room for future 5301b411b363SPhilipp Reisner * enhancements of the handshake protocol, and possible plugins... 5302b411b363SPhilipp Reisner * 5303b411b363SPhilipp Reisner * for now, they are expected to be zero, but ignored. 5304b411b363SPhilipp Reisner */ 5305bde89a9eSAndreas Gruenbacher static int drbd_send_features(struct drbd_connection *connection) 5306b411b363SPhilipp Reisner { 53079f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 53089f5bdc33SAndreas Gruenbacher struct p_connection_features *p; 5309b411b363SPhilipp Reisner 5310bde89a9eSAndreas Gruenbacher sock = &connection->data; 5311bde89a9eSAndreas Gruenbacher p = conn_prepare_command(connection, sock); 53129f5bdc33SAndreas Gruenbacher if (!p) 5313e8d17b01SAndreas Gruenbacher return -EIO; 5314b411b363SPhilipp Reisner memset(p, 0, sizeof(*p)); 5315b411b363SPhilipp Reisner p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 5316b411b363SPhilipp Reisner p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 531720c68fdeSLars Ellenberg p->feature_flags = cpu_to_be32(PRO_FEATURES); 5318bde89a9eSAndreas Gruenbacher return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); 5319b411b363SPhilipp Reisner } 5320b411b363SPhilipp Reisner 5321b411b363SPhilipp Reisner /* 5322b411b363SPhilipp Reisner * return values: 5323b411b363SPhilipp Reisner * 1 yes, we have a valid connection 5324b411b363SPhilipp Reisner * 0 oops, did not work out, please try again 5325b411b363SPhilipp Reisner * -1 peer talks different language, 5326b411b363SPhilipp Reisner * no point in trying again, please go standalone. 5327b411b363SPhilipp Reisner */ 5328bde89a9eSAndreas Gruenbacher static int drbd_do_features(struct drbd_connection *connection) 5329b411b363SPhilipp Reisner { 5330bde89a9eSAndreas Gruenbacher /* ASSERT current == connection->receiver ... */ 5331e658983aSAndreas Gruenbacher struct p_connection_features *p; 5332e658983aSAndreas Gruenbacher const int expect = sizeof(struct p_connection_features); 533377351055SPhilipp Reisner struct packet_info pi; 5334a5c31904SAndreas Gruenbacher int err; 5335b411b363SPhilipp Reisner 5336bde89a9eSAndreas Gruenbacher err = drbd_send_features(connection); 5337e8d17b01SAndreas Gruenbacher if (err) 5338b411b363SPhilipp Reisner return 0; 5339b411b363SPhilipp Reisner 5340bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 534169bc7bc3SAndreas Gruenbacher if (err) 5342b411b363SPhilipp Reisner return 0; 5343b411b363SPhilipp Reisner 53446038178eSAndreas Gruenbacher if (pi.cmd != P_CONNECTION_FEATURES) { 53451ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n", 534677351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 5347b411b363SPhilipp Reisner return -1; 5348b411b363SPhilipp Reisner } 5349b411b363SPhilipp Reisner 535077351055SPhilipp Reisner if (pi.size != expect) { 53511ec861ebSAndreas Gruenbacher drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n", 535277351055SPhilipp Reisner expect, pi.size); 5353b411b363SPhilipp Reisner return -1; 5354b411b363SPhilipp Reisner } 5355b411b363SPhilipp Reisner 5356e658983aSAndreas Gruenbacher p = pi.data; 5357bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, p, expect); 5358a5c31904SAndreas Gruenbacher if (err) 5359b411b363SPhilipp Reisner return 0; 5360b411b363SPhilipp Reisner 5361b411b363SPhilipp Reisner p->protocol_min = be32_to_cpu(p->protocol_min); 5362b411b363SPhilipp Reisner p->protocol_max = be32_to_cpu(p->protocol_max); 5363b411b363SPhilipp Reisner if (p->protocol_max == 0) 5364b411b363SPhilipp Reisner p->protocol_max = p->protocol_min; 5365b411b363SPhilipp Reisner 5366b411b363SPhilipp Reisner if (PRO_VERSION_MAX < p->protocol_min || 5367b411b363SPhilipp Reisner PRO_VERSION_MIN > p->protocol_max) 5368b411b363SPhilipp Reisner goto incompat; 5369b411b363SPhilipp Reisner 5370bde89a9eSAndreas Gruenbacher connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); 537120c68fdeSLars Ellenberg connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); 5372b411b363SPhilipp Reisner 53731ec861ebSAndreas Gruenbacher drbd_info(connection, "Handshake successful: " 5374bde89a9eSAndreas Gruenbacher "Agreed network protocol version %d\n", connection->agreed_pro_version); 5375b411b363SPhilipp Reisner 5376f31e583aSLars Ellenberg drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n", 53779104d31aSLars Ellenberg connection->agreed_features, 53789104d31aSLars Ellenberg connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", 53799104d31aSLars Ellenberg connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", 5380f31e583aSLars Ellenberg connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "", 5381f31e583aSLars Ellenberg connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" : 53829104d31aSLars Ellenberg connection->agreed_features ? "" : " none"); 538392d94ae6SPhilipp Reisner 5384b411b363SPhilipp Reisner return 1; 5385b411b363SPhilipp Reisner 5386b411b363SPhilipp Reisner incompat: 53871ec861ebSAndreas Gruenbacher drbd_err(connection, "incompatible DRBD dialects: " 5388b411b363SPhilipp Reisner "I support %d-%d, peer supports %d-%d\n", 5389b411b363SPhilipp Reisner PRO_VERSION_MIN, PRO_VERSION_MAX, 5390b411b363SPhilipp Reisner p->protocol_min, p->protocol_max); 5391b411b363SPhilipp Reisner return -1; 5392b411b363SPhilipp Reisner } 5393b411b363SPhilipp Reisner 5394b411b363SPhilipp Reisner #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) 5395bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 5396b411b363SPhilipp Reisner { 53971ec861ebSAndreas Gruenbacher drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); 53981ec861ebSAndreas Gruenbacher drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); 5399b10d96cbSJohannes Thoma return -1; 5400b411b363SPhilipp Reisner } 5401b411b363SPhilipp Reisner #else 5402b411b363SPhilipp Reisner #define CHALLENGE_LEN 64 5403b10d96cbSJohannes Thoma 5404b10d96cbSJohannes Thoma /* Return value: 5405b10d96cbSJohannes Thoma 1 - auth succeeded, 5406b10d96cbSJohannes Thoma 0 - failed, try again (network error), 5407b10d96cbSJohannes Thoma -1 - auth failed, don't try again. 5408b10d96cbSJohannes Thoma */ 5409b10d96cbSJohannes Thoma 5410bde89a9eSAndreas Gruenbacher static int drbd_do_auth(struct drbd_connection *connection) 5411b411b363SPhilipp Reisner { 54129f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 5413b411b363SPhilipp Reisner char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ 5414b411b363SPhilipp Reisner char *response = NULL; 5415b411b363SPhilipp Reisner char *right_response = NULL; 5416b411b363SPhilipp Reisner char *peers_ch = NULL; 541744ed167dSPhilipp Reisner unsigned int key_len; 541844ed167dSPhilipp Reisner char secret[SHARED_SECRET_MAX]; /* 64 byte */ 5419b411b363SPhilipp Reisner unsigned int resp_size; 542077ce56e2SArnd Bergmann struct shash_desc *desc; 542177351055SPhilipp Reisner struct packet_info pi; 542244ed167dSPhilipp Reisner struct net_conf *nc; 542369bc7bc3SAndreas Gruenbacher int err, rv; 5424b411b363SPhilipp Reisner 54259f5bdc33SAndreas Gruenbacher /* FIXME: Put the challenge/response into the preallocated socket buffer. */ 54269f5bdc33SAndreas Gruenbacher 542744ed167dSPhilipp Reisner rcu_read_lock(); 5428bde89a9eSAndreas Gruenbacher nc = rcu_dereference(connection->net_conf); 542944ed167dSPhilipp Reisner key_len = strlen(nc->shared_secret); 543044ed167dSPhilipp Reisner memcpy(secret, nc->shared_secret, key_len); 543144ed167dSPhilipp Reisner rcu_read_unlock(); 543244ed167dSPhilipp Reisner 543377ce56e2SArnd Bergmann desc = kmalloc(sizeof(struct shash_desc) + 543477ce56e2SArnd Bergmann crypto_shash_descsize(connection->cram_hmac_tfm), 543577ce56e2SArnd Bergmann GFP_KERNEL); 543677ce56e2SArnd Bergmann if (!desc) { 543777ce56e2SArnd Bergmann rv = -1; 543877ce56e2SArnd Bergmann goto fail; 543977ce56e2SArnd Bergmann } 54409534d671SHerbert Xu desc->tfm = connection->cram_hmac_tfm; 5441b411b363SPhilipp Reisner 54429534d671SHerbert Xu rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); 5443b411b363SPhilipp Reisner if (rv) { 54449534d671SHerbert Xu drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv); 5445b10d96cbSJohannes Thoma rv = -1; 5446b411b363SPhilipp Reisner goto fail; 5447b411b363SPhilipp Reisner } 5448b411b363SPhilipp Reisner 5449b411b363SPhilipp Reisner get_random_bytes(my_challenge, CHALLENGE_LEN); 5450b411b363SPhilipp Reisner 5451bde89a9eSAndreas Gruenbacher sock = &connection->data; 5452bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 54539f5bdc33SAndreas Gruenbacher rv = 0; 54549f5bdc33SAndreas Gruenbacher goto fail; 54559f5bdc33SAndreas Gruenbacher } 5456bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0, 54579f5bdc33SAndreas Gruenbacher my_challenge, CHALLENGE_LEN); 5458b411b363SPhilipp Reisner if (!rv) 5459b411b363SPhilipp Reisner goto fail; 5460b411b363SPhilipp Reisner 5461bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 546269bc7bc3SAndreas Gruenbacher if (err) { 5463b411b363SPhilipp Reisner rv = 0; 5464b411b363SPhilipp Reisner goto fail; 5465b411b363SPhilipp Reisner } 5466b411b363SPhilipp Reisner 546777351055SPhilipp Reisner if (pi.cmd != P_AUTH_CHALLENGE) { 54681ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n", 546977351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 54709049ccd4SLars Ellenberg rv = -1; 5471b411b363SPhilipp Reisner goto fail; 5472b411b363SPhilipp Reisner } 5473b411b363SPhilipp Reisner 547477351055SPhilipp Reisner if (pi.size > CHALLENGE_LEN * 2) { 54751ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthChallenge payload too big.\n"); 5476b10d96cbSJohannes Thoma rv = -1; 5477b411b363SPhilipp Reisner goto fail; 5478b411b363SPhilipp Reisner } 5479b411b363SPhilipp Reisner 548067cca286SPhilipp Reisner if (pi.size < CHALLENGE_LEN) { 548167cca286SPhilipp Reisner drbd_err(connection, "AuthChallenge payload too small.\n"); 548267cca286SPhilipp Reisner rv = -1; 548367cca286SPhilipp Reisner goto fail; 548467cca286SPhilipp Reisner } 548567cca286SPhilipp Reisner 548677351055SPhilipp Reisner peers_ch = kmalloc(pi.size, GFP_NOIO); 5487b411b363SPhilipp Reisner if (peers_ch == NULL) { 54881ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of peers_ch failed\n"); 5489b10d96cbSJohannes Thoma rv = -1; 5490b411b363SPhilipp Reisner goto fail; 5491b411b363SPhilipp Reisner } 5492b411b363SPhilipp Reisner 5493bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, peers_ch, pi.size); 5494a5c31904SAndreas Gruenbacher if (err) { 5495b411b363SPhilipp Reisner rv = 0; 5496b411b363SPhilipp Reisner goto fail; 5497b411b363SPhilipp Reisner } 5498b411b363SPhilipp Reisner 549967cca286SPhilipp Reisner if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { 550067cca286SPhilipp Reisner drbd_err(connection, "Peer presented the same challenge!\n"); 550167cca286SPhilipp Reisner rv = -1; 550267cca286SPhilipp Reisner goto fail; 550367cca286SPhilipp Reisner } 550467cca286SPhilipp Reisner 55059534d671SHerbert Xu resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm); 5506b411b363SPhilipp Reisner response = kmalloc(resp_size, GFP_NOIO); 5507b411b363SPhilipp Reisner if (response == NULL) { 55081ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of response failed\n"); 5509b10d96cbSJohannes Thoma rv = -1; 5510b411b363SPhilipp Reisner goto fail; 5511b411b363SPhilipp Reisner } 5512b411b363SPhilipp Reisner 55139534d671SHerbert Xu rv = crypto_shash_digest(desc, peers_ch, pi.size, response); 5514b411b363SPhilipp Reisner if (rv) { 55151ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 5516b10d96cbSJohannes Thoma rv = -1; 5517b411b363SPhilipp Reisner goto fail; 5518b411b363SPhilipp Reisner } 5519b411b363SPhilipp Reisner 5520bde89a9eSAndreas Gruenbacher if (!conn_prepare_command(connection, sock)) { 55219f5bdc33SAndreas Gruenbacher rv = 0; 55229f5bdc33SAndreas Gruenbacher goto fail; 55239f5bdc33SAndreas Gruenbacher } 5524bde89a9eSAndreas Gruenbacher rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0, 55259f5bdc33SAndreas Gruenbacher response, resp_size); 5526b411b363SPhilipp Reisner if (!rv) 5527b411b363SPhilipp Reisner goto fail; 5528b411b363SPhilipp Reisner 5529bde89a9eSAndreas Gruenbacher err = drbd_recv_header(connection, &pi); 553069bc7bc3SAndreas Gruenbacher if (err) { 5531b411b363SPhilipp Reisner rv = 0; 5532b411b363SPhilipp Reisner goto fail; 5533b411b363SPhilipp Reisner } 5534b411b363SPhilipp Reisner 553577351055SPhilipp Reisner if (pi.cmd != P_AUTH_RESPONSE) { 55361ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n", 553777351055SPhilipp Reisner cmdname(pi.cmd), pi.cmd); 5538b411b363SPhilipp Reisner rv = 0; 5539b411b363SPhilipp Reisner goto fail; 5540b411b363SPhilipp Reisner } 5541b411b363SPhilipp Reisner 554277351055SPhilipp Reisner if (pi.size != resp_size) { 55431ec861ebSAndreas Gruenbacher drbd_err(connection, "expected AuthResponse payload of wrong size\n"); 5544b411b363SPhilipp Reisner rv = 0; 5545b411b363SPhilipp Reisner goto fail; 5546b411b363SPhilipp Reisner } 5547b411b363SPhilipp Reisner 5548bde89a9eSAndreas Gruenbacher err = drbd_recv_all_warn(connection, response , resp_size); 5549a5c31904SAndreas Gruenbacher if (err) { 5550b411b363SPhilipp Reisner rv = 0; 5551b411b363SPhilipp Reisner goto fail; 5552b411b363SPhilipp Reisner } 5553b411b363SPhilipp Reisner 5554b411b363SPhilipp Reisner right_response = kmalloc(resp_size, GFP_NOIO); 55552d1ee87dSJulia Lawall if (right_response == NULL) { 55561ec861ebSAndreas Gruenbacher drbd_err(connection, "kmalloc of right_response failed\n"); 5557b10d96cbSJohannes Thoma rv = -1; 5558b411b363SPhilipp Reisner goto fail; 5559b411b363SPhilipp Reisner } 5560b411b363SPhilipp Reisner 55619534d671SHerbert Xu rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN, 55629534d671SHerbert Xu right_response); 5563b411b363SPhilipp Reisner if (rv) { 55641ec861ebSAndreas Gruenbacher drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv); 5565b10d96cbSJohannes Thoma rv = -1; 5566b411b363SPhilipp Reisner goto fail; 5567b411b363SPhilipp Reisner } 5568b411b363SPhilipp Reisner 5569b411b363SPhilipp Reisner rv = !memcmp(response, right_response, resp_size); 5570b411b363SPhilipp Reisner 5571b411b363SPhilipp Reisner if (rv) 55721ec861ebSAndreas Gruenbacher drbd_info(connection, "Peer authenticated using %d bytes HMAC\n", 557344ed167dSPhilipp Reisner resp_size); 5574b10d96cbSJohannes Thoma else 5575b10d96cbSJohannes Thoma rv = -1; 5576b411b363SPhilipp Reisner 5577b411b363SPhilipp Reisner fail: 5578b411b363SPhilipp Reisner kfree(peers_ch); 5579b411b363SPhilipp Reisner kfree(response); 5580b411b363SPhilipp Reisner kfree(right_response); 558177ce56e2SArnd Bergmann if (desc) { 55829534d671SHerbert Xu shash_desc_zero(desc); 558377ce56e2SArnd Bergmann kfree(desc); 558477ce56e2SArnd Bergmann } 5585b411b363SPhilipp Reisner 5586b411b363SPhilipp Reisner return rv; 5587b411b363SPhilipp Reisner } 5588b411b363SPhilipp Reisner #endif 5589b411b363SPhilipp Reisner 55908fe60551SAndreas Gruenbacher int drbd_receiver(struct drbd_thread *thi) 5591b411b363SPhilipp Reisner { 5592bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 5593b411b363SPhilipp Reisner int h; 5594b411b363SPhilipp Reisner 55951ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver (re)started\n"); 5596b411b363SPhilipp Reisner 5597b411b363SPhilipp Reisner do { 5598bde89a9eSAndreas Gruenbacher h = conn_connect(connection); 5599b411b363SPhilipp Reisner if (h == 0) { 5600bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 560120ee6390SPhilipp Reisner schedule_timeout_interruptible(HZ); 5602b411b363SPhilipp Reisner } 5603b411b363SPhilipp Reisner if (h == -1) { 56041ec861ebSAndreas Gruenbacher drbd_warn(connection, "Discarding network configuration.\n"); 5605bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 5606b411b363SPhilipp Reisner } 5607b411b363SPhilipp Reisner } while (h == 0); 5608b411b363SPhilipp Reisner 5609c51a0ef3SLars Ellenberg if (h > 0) { 5610c51a0ef3SLars Ellenberg blk_start_plug(&connection->receiver_plug); 5611bde89a9eSAndreas Gruenbacher drbdd(connection); 5612c51a0ef3SLars Ellenberg blk_finish_plug(&connection->receiver_plug); 5613c51a0ef3SLars Ellenberg } 5614b411b363SPhilipp Reisner 5615bde89a9eSAndreas Gruenbacher conn_disconnect(connection); 5616b411b363SPhilipp Reisner 56171ec861ebSAndreas Gruenbacher drbd_info(connection, "receiver terminated\n"); 5618b411b363SPhilipp Reisner return 0; 5619b411b363SPhilipp Reisner } 5620b411b363SPhilipp Reisner 5621b411b363SPhilipp Reisner /* ********* acknowledge sender ******** */ 5622b411b363SPhilipp Reisner 5623bde89a9eSAndreas Gruenbacher static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5624b411b363SPhilipp Reisner { 5625e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5626b411b363SPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5627b411b363SPhilipp Reisner 5628b411b363SPhilipp Reisner if (retcode >= SS_SUCCESS) { 5629bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags); 5630b411b363SPhilipp Reisner } else { 5631bde89a9eSAndreas Gruenbacher set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags); 56321ec861ebSAndreas Gruenbacher drbd_err(connection, "Requested state change failed by peer: %s (%d)\n", 5633fc3b10a4SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5634fc3b10a4SPhilipp Reisner } 5635bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5636e4f78edeSPhilipp Reisner 56372735a594SAndreas Gruenbacher return 0; 5638fc3b10a4SPhilipp Reisner } 5639e4f78edeSPhilipp Reisner 5640bde89a9eSAndreas Gruenbacher static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi) 5641e4f78edeSPhilipp Reisner { 56429f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5643b30ab791SAndreas Gruenbacher struct drbd_device *device; 5644e658983aSAndreas Gruenbacher struct p_req_state_reply *p = pi->data; 5645e4f78edeSPhilipp Reisner int retcode = be32_to_cpu(p->retcode); 5646e4f78edeSPhilipp Reisner 56479f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 56489f4fe9adSAndreas Gruenbacher if (!peer_device) 56492735a594SAndreas Gruenbacher return -EIO; 56509f4fe9adSAndreas Gruenbacher device = peer_device->device; 56511952e916SAndreas Gruenbacher 5652bde89a9eSAndreas Gruenbacher if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) { 56530b0ba1efSAndreas Gruenbacher D_ASSERT(device, connection->agreed_pro_version < 100); 5654bde89a9eSAndreas Gruenbacher return got_conn_RqSReply(connection, pi); 56554d0fc3fdSPhilipp Reisner } 56564d0fc3fdSPhilipp Reisner 5657e4f78edeSPhilipp Reisner if (retcode >= SS_SUCCESS) { 5658b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_SUCCESS, &device->flags); 5659e4f78edeSPhilipp Reisner } else { 5660b30ab791SAndreas Gruenbacher set_bit(CL_ST_CHG_FAIL, &device->flags); 5661d0180171SAndreas Gruenbacher drbd_err(device, "Requested state change failed by peer: %s (%d)\n", 5662b411b363SPhilipp Reisner drbd_set_st_err_str(retcode), retcode); 5663b411b363SPhilipp Reisner } 5664b30ab791SAndreas Gruenbacher wake_up(&device->state_wait); 5665b411b363SPhilipp Reisner 56662735a594SAndreas Gruenbacher return 0; 5667b411b363SPhilipp Reisner } 5668b411b363SPhilipp Reisner 5669bde89a9eSAndreas Gruenbacher static int got_Ping(struct drbd_connection *connection, struct packet_info *pi) 5670b411b363SPhilipp Reisner { 5671bde89a9eSAndreas Gruenbacher return drbd_send_ping_ack(connection); 5672b411b363SPhilipp Reisner 5673b411b363SPhilipp Reisner } 5674b411b363SPhilipp Reisner 5675bde89a9eSAndreas Gruenbacher static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi) 5676b411b363SPhilipp Reisner { 5677b411b363SPhilipp Reisner /* restore idle timeout */ 5678bde89a9eSAndreas Gruenbacher connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ; 5679bde89a9eSAndreas Gruenbacher if (!test_and_set_bit(GOT_PING_ACK, &connection->flags)) 5680bde89a9eSAndreas Gruenbacher wake_up(&connection->ping_wait); 5681b411b363SPhilipp Reisner 56822735a594SAndreas Gruenbacher return 0; 5683b411b363SPhilipp Reisner } 5684b411b363SPhilipp Reisner 5685bde89a9eSAndreas Gruenbacher static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi) 5686b411b363SPhilipp Reisner { 56879f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5688b30ab791SAndreas Gruenbacher struct drbd_device *device; 5689e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5690b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5691b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5692b411b363SPhilipp Reisner 56939f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 56949f4fe9adSAndreas Gruenbacher if (!peer_device) 56952735a594SAndreas Gruenbacher return -EIO; 56969f4fe9adSAndreas Gruenbacher device = peer_device->device; 56971952e916SAndreas Gruenbacher 56989f4fe9adSAndreas Gruenbacher D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89); 5699b411b363SPhilipp Reisner 570069a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5701b411b363SPhilipp Reisner 5702b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 5703b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5704b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5705b411b363SPhilipp Reisner /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 5706b30ab791SAndreas Gruenbacher device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 5707b30ab791SAndreas Gruenbacher put_ldev(device); 57081d53f09eSLars Ellenberg } 5709b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5710b30ab791SAndreas Gruenbacher atomic_add(blksize >> 9, &device->rs_sect_in); 5711b411b363SPhilipp Reisner 57122735a594SAndreas Gruenbacher return 0; 5713b411b363SPhilipp Reisner } 5714b411b363SPhilipp Reisner 5715bc9c5c41SAndreas Gruenbacher static int 5716b30ab791SAndreas Gruenbacher validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, 5717bc9c5c41SAndreas Gruenbacher struct rb_root *root, const char *func, 5718bc9c5c41SAndreas Gruenbacher enum drbd_req_event what, bool missing_ok) 5719b411b363SPhilipp Reisner { 5720b411b363SPhilipp Reisner struct drbd_request *req; 5721b411b363SPhilipp Reisner struct bio_and_error m; 5722b411b363SPhilipp Reisner 57230500813fSAndreas Gruenbacher spin_lock_irq(&device->resource->req_lock); 5724b30ab791SAndreas Gruenbacher req = find_request(device, root, id, sector, missing_ok, func); 5725b411b363SPhilipp Reisner if (unlikely(!req)) { 57260500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 572785997675SAndreas Gruenbacher return -EIO; 5728b411b363SPhilipp Reisner } 5729b411b363SPhilipp Reisner __req_mod(req, what, &m); 57300500813fSAndreas Gruenbacher spin_unlock_irq(&device->resource->req_lock); 5731b411b363SPhilipp Reisner 5732b411b363SPhilipp Reisner if (m.bio) 5733b30ab791SAndreas Gruenbacher complete_master_bio(device, &m); 573485997675SAndreas Gruenbacher return 0; 5735b411b363SPhilipp Reisner } 5736b411b363SPhilipp Reisner 5737bde89a9eSAndreas Gruenbacher static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi) 5738b411b363SPhilipp Reisner { 57399f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5740b30ab791SAndreas Gruenbacher struct drbd_device *device; 5741e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5742b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5743b411b363SPhilipp Reisner int blksize = be32_to_cpu(p->blksize); 5744b411b363SPhilipp Reisner enum drbd_req_event what; 5745b411b363SPhilipp Reisner 57469f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 57479f4fe9adSAndreas Gruenbacher if (!peer_device) 57482735a594SAndreas Gruenbacher return -EIO; 57499f4fe9adSAndreas Gruenbacher device = peer_device->device; 57501952e916SAndreas Gruenbacher 575169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5752b411b363SPhilipp Reisner 5753579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5754b30ab791SAndreas Gruenbacher drbd_set_in_sync(device, sector, blksize); 5755b30ab791SAndreas Gruenbacher dec_rs_pending(device); 57562735a594SAndreas Gruenbacher return 0; 5757b411b363SPhilipp Reisner } 5758e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5759b411b363SPhilipp Reisner case P_RS_WRITE_ACK: 57608554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER_AND_SIS; 5761b411b363SPhilipp Reisner break; 5762b411b363SPhilipp Reisner case P_WRITE_ACK: 57638554df1cSAndreas Gruenbacher what = WRITE_ACKED_BY_PEER; 5764b411b363SPhilipp Reisner break; 5765b411b363SPhilipp Reisner case P_RECV_ACK: 57668554df1cSAndreas Gruenbacher what = RECV_ACKED_BY_PEER; 5767b411b363SPhilipp Reisner break; 5768d4dabbe2SLars Ellenberg case P_SUPERSEDED: 5769d4dabbe2SLars Ellenberg what = CONFLICT_RESOLVED; 57707be8da07SAndreas Gruenbacher break; 57717be8da07SAndreas Gruenbacher case P_RETRY_WRITE: 57727be8da07SAndreas Gruenbacher what = POSTPONE_WRITE; 5773b411b363SPhilipp Reisner break; 5774b411b363SPhilipp Reisner default: 57752735a594SAndreas Gruenbacher BUG(); 5776b411b363SPhilipp Reisner } 5777b411b363SPhilipp Reisner 5778b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5779b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5780bc9c5c41SAndreas Gruenbacher what, false); 5781b411b363SPhilipp Reisner } 5782b411b363SPhilipp Reisner 5783bde89a9eSAndreas Gruenbacher static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi) 5784b411b363SPhilipp Reisner { 57859f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5786b30ab791SAndreas Gruenbacher struct drbd_device *device; 5787e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5788b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 57892deb8336SPhilipp Reisner int size = be32_to_cpu(p->blksize); 579085997675SAndreas Gruenbacher int err; 5791b411b363SPhilipp Reisner 57929f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 57939f4fe9adSAndreas Gruenbacher if (!peer_device) 57942735a594SAndreas Gruenbacher return -EIO; 57959f4fe9adSAndreas Gruenbacher device = peer_device->device; 5796b411b363SPhilipp Reisner 579769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5798b411b363SPhilipp Reisner 5799579b57edSAndreas Gruenbacher if (p->block_id == ID_SYNCER) { 5800b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5801b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 58022735a594SAndreas Gruenbacher return 0; 5803b411b363SPhilipp Reisner } 58042deb8336SPhilipp Reisner 5805b30ab791SAndreas Gruenbacher err = validate_req_change_req_state(device, p->block_id, sector, 5806b30ab791SAndreas Gruenbacher &device->write_requests, __func__, 5807303d1448SPhilipp Reisner NEG_ACKED, true); 580885997675SAndreas Gruenbacher if (err) { 58092deb8336SPhilipp Reisner /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. 58102deb8336SPhilipp Reisner The master bio might already be completed, therefore the 5811c3afd8f5SAndreas Gruenbacher request is no longer in the collision hash. */ 58122deb8336SPhilipp Reisner /* In Protocol B we might already have got a P_RECV_ACK 58132deb8336SPhilipp Reisner but then get a P_NEG_ACK afterwards. */ 5814b30ab791SAndreas Gruenbacher drbd_set_out_of_sync(device, sector, size); 58152deb8336SPhilipp Reisner } 58162735a594SAndreas Gruenbacher return 0; 5817b411b363SPhilipp Reisner } 5818b411b363SPhilipp Reisner 5819bde89a9eSAndreas Gruenbacher static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi) 5820b411b363SPhilipp Reisner { 58219f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5822b30ab791SAndreas Gruenbacher struct drbd_device *device; 5823e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 5824b411b363SPhilipp Reisner sector_t sector = be64_to_cpu(p->sector); 5825b411b363SPhilipp Reisner 58269f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 58279f4fe9adSAndreas Gruenbacher if (!peer_device) 58282735a594SAndreas Gruenbacher return -EIO; 58299f4fe9adSAndreas Gruenbacher device = peer_device->device; 58301952e916SAndreas Gruenbacher 583169a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 58327be8da07SAndreas Gruenbacher 5833d0180171SAndreas Gruenbacher drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", 5834b411b363SPhilipp Reisner (unsigned long long)sector, be32_to_cpu(p->blksize)); 5835b411b363SPhilipp Reisner 5836b30ab791SAndreas Gruenbacher return validate_req_change_req_state(device, p->block_id, sector, 5837b30ab791SAndreas Gruenbacher &device->read_requests, __func__, 58388554df1cSAndreas Gruenbacher NEG_ACKED, false); 5839b411b363SPhilipp Reisner } 5840b411b363SPhilipp Reisner 5841bde89a9eSAndreas Gruenbacher static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi) 5842b411b363SPhilipp Reisner { 58439f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5844b30ab791SAndreas Gruenbacher struct drbd_device *device; 5845b411b363SPhilipp Reisner sector_t sector; 5846b411b363SPhilipp Reisner int size; 5847e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 58481952e916SAndreas Gruenbacher 58499f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 58509f4fe9adSAndreas Gruenbacher if (!peer_device) 58512735a594SAndreas Gruenbacher return -EIO; 58529f4fe9adSAndreas Gruenbacher device = peer_device->device; 5853b411b363SPhilipp Reisner 5854b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5855b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5856b411b363SPhilipp Reisner 585769a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5858b411b363SPhilipp Reisner 5859b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5860b411b363SPhilipp Reisner 5861b30ab791SAndreas Gruenbacher if (get_ldev_if_state(device, D_FAILED)) { 5862b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5863e05e1e59SAndreas Gruenbacher switch (pi->cmd) { 5864d612d309SPhilipp Reisner case P_NEG_RS_DREPLY: 5865b30ab791SAndreas Gruenbacher drbd_rs_failed_io(device, sector, size); 5866d612d309SPhilipp Reisner case P_RS_CANCEL: 5867d612d309SPhilipp Reisner break; 5868d612d309SPhilipp Reisner default: 58692735a594SAndreas Gruenbacher BUG(); 5870d612d309SPhilipp Reisner } 5871b30ab791SAndreas Gruenbacher put_ldev(device); 5872b411b363SPhilipp Reisner } 5873b411b363SPhilipp Reisner 58742735a594SAndreas Gruenbacher return 0; 5875b411b363SPhilipp Reisner } 5876b411b363SPhilipp Reisner 5877bde89a9eSAndreas Gruenbacher static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi) 5878b411b363SPhilipp Reisner { 5879e658983aSAndreas Gruenbacher struct p_barrier_ack *p = pi->data; 5880c06ece6bSAndreas Gruenbacher struct drbd_peer_device *peer_device; 58819ed57dcbSLars Ellenberg int vnr; 5882b411b363SPhilipp Reisner 5883bde89a9eSAndreas Gruenbacher tl_release(connection, p->barrier, be32_to_cpu(p->set_size)); 5884b411b363SPhilipp Reisner 58859ed57dcbSLars Ellenberg rcu_read_lock(); 5886c06ece6bSAndreas Gruenbacher idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 5887c06ece6bSAndreas Gruenbacher struct drbd_device *device = peer_device->device; 5888c06ece6bSAndreas Gruenbacher 5889b30ab791SAndreas Gruenbacher if (device->state.conn == C_AHEAD && 5890b30ab791SAndreas Gruenbacher atomic_read(&device->ap_in_flight) == 0 && 5891b30ab791SAndreas Gruenbacher !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) { 5892b30ab791SAndreas Gruenbacher device->start_resync_timer.expires = jiffies + HZ; 5893b30ab791SAndreas Gruenbacher add_timer(&device->start_resync_timer); 5894c4752ef1SPhilipp Reisner } 58959ed57dcbSLars Ellenberg } 58969ed57dcbSLars Ellenberg rcu_read_unlock(); 5897c4752ef1SPhilipp Reisner 58982735a594SAndreas Gruenbacher return 0; 5899b411b363SPhilipp Reisner } 5900b411b363SPhilipp Reisner 5901bde89a9eSAndreas Gruenbacher static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi) 5902b411b363SPhilipp Reisner { 59039f4fe9adSAndreas Gruenbacher struct drbd_peer_device *peer_device; 5904b30ab791SAndreas Gruenbacher struct drbd_device *device; 5905e658983aSAndreas Gruenbacher struct p_block_ack *p = pi->data; 590684b8c06bSAndreas Gruenbacher struct drbd_device_work *dw; 5907b411b363SPhilipp Reisner sector_t sector; 5908b411b363SPhilipp Reisner int size; 5909b411b363SPhilipp Reisner 59109f4fe9adSAndreas Gruenbacher peer_device = conn_peer_device(connection, pi->vnr); 59119f4fe9adSAndreas Gruenbacher if (!peer_device) 59122735a594SAndreas Gruenbacher return -EIO; 59139f4fe9adSAndreas Gruenbacher device = peer_device->device; 59141952e916SAndreas Gruenbacher 5915b411b363SPhilipp Reisner sector = be64_to_cpu(p->sector); 5916b411b363SPhilipp Reisner size = be32_to_cpu(p->blksize); 5917b411b363SPhilipp Reisner 591869a22773SAndreas Gruenbacher update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); 5919b411b363SPhilipp Reisner 5920b411b363SPhilipp Reisner if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) 5921b30ab791SAndreas Gruenbacher drbd_ov_out_of_sync_found(device, sector, size); 5922b411b363SPhilipp Reisner else 5923b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5924b411b363SPhilipp Reisner 5925b30ab791SAndreas Gruenbacher if (!get_ldev(device)) 59262735a594SAndreas Gruenbacher return 0; 59271d53f09eSLars Ellenberg 5928b30ab791SAndreas Gruenbacher drbd_rs_complete_io(device, sector); 5929b30ab791SAndreas Gruenbacher dec_rs_pending(device); 5930b411b363SPhilipp Reisner 5931b30ab791SAndreas Gruenbacher --device->ov_left; 5932ea5442afSLars Ellenberg 5933ea5442afSLars Ellenberg /* let's advance progress step marks only for every other megabyte */ 5934b30ab791SAndreas Gruenbacher if ((device->ov_left & 0x200) == 0x200) 5935b30ab791SAndreas Gruenbacher drbd_advance_rs_marks(device, device->ov_left); 5936ea5442afSLars Ellenberg 5937b30ab791SAndreas Gruenbacher if (device->ov_left == 0) { 593884b8c06bSAndreas Gruenbacher dw = kmalloc(sizeof(*dw), GFP_NOIO); 593984b8c06bSAndreas Gruenbacher if (dw) { 594084b8c06bSAndreas Gruenbacher dw->w.cb = w_ov_finished; 594184b8c06bSAndreas Gruenbacher dw->device = device; 594284b8c06bSAndreas Gruenbacher drbd_queue_work(&peer_device->connection->sender_work, &dw->w); 5943b411b363SPhilipp Reisner } else { 594484b8c06bSAndreas Gruenbacher drbd_err(device, "kmalloc(dw) failed."); 5945b30ab791SAndreas Gruenbacher ov_out_of_sync_print(device); 5946b30ab791SAndreas Gruenbacher drbd_resync_finished(device); 5947b411b363SPhilipp Reisner } 5948b411b363SPhilipp Reisner } 5949b30ab791SAndreas Gruenbacher put_ldev(device); 59502735a594SAndreas Gruenbacher return 0; 5951b411b363SPhilipp Reisner } 5952b411b363SPhilipp Reisner 5953bde89a9eSAndreas Gruenbacher static int got_skip(struct drbd_connection *connection, struct packet_info *pi) 59540ced55a3SPhilipp Reisner { 59552735a594SAndreas Gruenbacher return 0; 59560ced55a3SPhilipp Reisner } 59570ced55a3SPhilipp Reisner 5958668700b4SPhilipp Reisner struct meta_sock_cmd { 5959b411b363SPhilipp Reisner size_t pkt_size; 5960bde89a9eSAndreas Gruenbacher int (*fn)(struct drbd_connection *connection, struct packet_info *); 5961b411b363SPhilipp Reisner }; 5962b411b363SPhilipp Reisner 5963668700b4SPhilipp Reisner static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout) 5964668700b4SPhilipp Reisner { 5965668700b4SPhilipp Reisner long t; 5966668700b4SPhilipp Reisner struct net_conf *nc; 5967668700b4SPhilipp Reisner 5968668700b4SPhilipp Reisner rcu_read_lock(); 5969668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 5970668700b4SPhilipp Reisner t = ping_timeout ? nc->ping_timeo : nc->ping_int; 5971668700b4SPhilipp Reisner rcu_read_unlock(); 5972668700b4SPhilipp Reisner 5973668700b4SPhilipp Reisner t *= HZ; 5974668700b4SPhilipp Reisner if (ping_timeout) 5975668700b4SPhilipp Reisner t /= 10; 5976668700b4SPhilipp Reisner 5977668700b4SPhilipp Reisner connection->meta.socket->sk->sk_rcvtimeo = t; 5978668700b4SPhilipp Reisner } 5979668700b4SPhilipp Reisner 5980668700b4SPhilipp Reisner static void set_ping_timeout(struct drbd_connection *connection) 5981668700b4SPhilipp Reisner { 5982668700b4SPhilipp Reisner set_rcvtimeo(connection, 1); 5983668700b4SPhilipp Reisner } 5984668700b4SPhilipp Reisner 5985668700b4SPhilipp Reisner static void set_idle_timeout(struct drbd_connection *connection) 5986668700b4SPhilipp Reisner { 5987668700b4SPhilipp Reisner set_rcvtimeo(connection, 0); 5988668700b4SPhilipp Reisner } 5989668700b4SPhilipp Reisner 5990668700b4SPhilipp Reisner static struct meta_sock_cmd ack_receiver_tbl[] = { 5991e658983aSAndreas Gruenbacher [P_PING] = { 0, got_Ping }, 5992e658983aSAndreas Gruenbacher [P_PING_ACK] = { 0, got_PingAck }, 5993b411b363SPhilipp Reisner [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5994b411b363SPhilipp Reisner [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5995b411b363SPhilipp Reisner [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 5996d4dabbe2SLars Ellenberg [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck }, 5997b411b363SPhilipp Reisner [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, 5998b411b363SPhilipp Reisner [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, 5999b411b363SPhilipp Reisner [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply }, 6000b411b363SPhilipp Reisner [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, 6001b411b363SPhilipp Reisner [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 6002b411b363SPhilipp Reisner [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 6003b411b363SPhilipp Reisner [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 600402918be2SPhilipp Reisner [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, 6005d612d309SPhilipp Reisner [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply }, 60061952e916SAndreas Gruenbacher [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply }, 60071952e916SAndreas Gruenbacher [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, 6008b411b363SPhilipp Reisner }; 6009b411b363SPhilipp Reisner 60101c03e520SPhilipp Reisner int drbd_ack_receiver(struct drbd_thread *thi) 6011b411b363SPhilipp Reisner { 6012bde89a9eSAndreas Gruenbacher struct drbd_connection *connection = thi->connection; 6013668700b4SPhilipp Reisner struct meta_sock_cmd *cmd = NULL; 601477351055SPhilipp Reisner struct packet_info pi; 6015668700b4SPhilipp Reisner unsigned long pre_recv_jif; 6016257d0af6SPhilipp Reisner int rv; 6017bde89a9eSAndreas Gruenbacher void *buf = connection->meta.rbuf; 6018b411b363SPhilipp Reisner int received = 0; 6019bde89a9eSAndreas Gruenbacher unsigned int header_size = drbd_header_size(connection); 602052b061a4SAndreas Gruenbacher int expect = header_size; 602144ed167dSPhilipp Reisner bool ping_timeout_active = false; 60223990e04dSPhilipp Reisner struct sched_param param = { .sched_priority = 2 }; 6023b411b363SPhilipp Reisner 60243990e04dSPhilipp Reisner rv = sched_setscheduler(current, SCHED_RR, ¶m); 60253990e04dSPhilipp Reisner if (rv < 0) 6026668700b4SPhilipp Reisner drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv); 6027b411b363SPhilipp Reisner 6028e77a0a5cSAndreas Gruenbacher while (get_t_state(thi) == RUNNING) { 602980822284SPhilipp Reisner drbd_thread_current_set_cpu(thi); 603044ed167dSPhilipp Reisner 6031668700b4SPhilipp Reisner conn_reclaim_net_peer_reqs(connection); 603244ed167dSPhilipp Reisner 6033bde89a9eSAndreas Gruenbacher if (test_and_clear_bit(SEND_PING, &connection->flags)) { 6034bde89a9eSAndreas Gruenbacher if (drbd_send_ping(connection)) { 60351ec861ebSAndreas Gruenbacher drbd_err(connection, "drbd_send_ping has failed\n"); 6036841ce241SAndreas Gruenbacher goto reconnect; 6037841ce241SAndreas Gruenbacher } 6038668700b4SPhilipp Reisner set_ping_timeout(connection); 603944ed167dSPhilipp Reisner ping_timeout_active = true; 6040b411b363SPhilipp Reisner } 6041b411b363SPhilipp Reisner 6042668700b4SPhilipp Reisner pre_recv_jif = jiffies; 6043bde89a9eSAndreas Gruenbacher rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); 6044b411b363SPhilipp Reisner 6045b411b363SPhilipp Reisner /* Note: 6046b411b363SPhilipp Reisner * -EINTR (on meta) we got a signal 6047b411b363SPhilipp Reisner * -EAGAIN (on meta) rcvtimeo expired 6048b411b363SPhilipp Reisner * -ECONNRESET other side closed the connection 6049b411b363SPhilipp Reisner * -ERESTARTSYS (on data) we got a signal 6050b411b363SPhilipp Reisner * rv < 0 other than above: unexpected error! 6051b411b363SPhilipp Reisner * rv == expected: full header or command 6052b411b363SPhilipp Reisner * rv < expected: "woken" by signal during receive 6053b411b363SPhilipp Reisner * rv == 0 : "connection shut down by peer" 6054b411b363SPhilipp Reisner */ 6055b411b363SPhilipp Reisner if (likely(rv > 0)) { 6056b411b363SPhilipp Reisner received += rv; 6057b411b363SPhilipp Reisner buf += rv; 6058b411b363SPhilipp Reisner } else if (rv == 0) { 6059bde89a9eSAndreas Gruenbacher if (test_bit(DISCONNECT_SENT, &connection->flags)) { 6060b66623e3SPhilipp Reisner long t; 6061b66623e3SPhilipp Reisner rcu_read_lock(); 6062bde89a9eSAndreas Gruenbacher t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10; 6063b66623e3SPhilipp Reisner rcu_read_unlock(); 6064b66623e3SPhilipp Reisner 6065bde89a9eSAndreas Gruenbacher t = wait_event_timeout(connection->ping_wait, 6066bde89a9eSAndreas Gruenbacher connection->cstate < C_WF_REPORT_PARAMS, 6067b66623e3SPhilipp Reisner t); 6068599377acSPhilipp Reisner if (t) 6069599377acSPhilipp Reisner break; 6070599377acSPhilipp Reisner } 60711ec861ebSAndreas Gruenbacher drbd_err(connection, "meta connection shut down by peer.\n"); 6072b411b363SPhilipp Reisner goto reconnect; 6073b411b363SPhilipp Reisner } else if (rv == -EAGAIN) { 6074cb6518cbSLars Ellenberg /* If the data socket received something meanwhile, 6075cb6518cbSLars Ellenberg * that is good enough: peer is still alive. */ 6076668700b4SPhilipp Reisner if (time_after(connection->last_received, pre_recv_jif)) 6077cb6518cbSLars Ellenberg continue; 6078f36af18cSLars Ellenberg if (ping_timeout_active) { 60791ec861ebSAndreas Gruenbacher drbd_err(connection, "PingAck did not arrive in time.\n"); 6080b411b363SPhilipp Reisner goto reconnect; 6081b411b363SPhilipp Reisner } 6082bde89a9eSAndreas Gruenbacher set_bit(SEND_PING, &connection->flags); 6083b411b363SPhilipp Reisner continue; 6084b411b363SPhilipp Reisner } else if (rv == -EINTR) { 6085668700b4SPhilipp Reisner /* maybe drbd_thread_stop(): the while condition will notice. 6086668700b4SPhilipp Reisner * maybe woken for send_ping: we'll send a ping above, 6087668700b4SPhilipp Reisner * and change the rcvtimeo */ 6088668700b4SPhilipp Reisner flush_signals(current); 6089b411b363SPhilipp Reisner continue; 6090b411b363SPhilipp Reisner } else { 60911ec861ebSAndreas Gruenbacher drbd_err(connection, "sock_recvmsg returned %d\n", rv); 6092b411b363SPhilipp Reisner goto reconnect; 6093b411b363SPhilipp Reisner } 6094b411b363SPhilipp Reisner 6095b411b363SPhilipp Reisner if (received == expect && cmd == NULL) { 6096bde89a9eSAndreas Gruenbacher if (decode_header(connection, connection->meta.rbuf, &pi)) 6097b411b363SPhilipp Reisner goto reconnect; 6098668700b4SPhilipp Reisner cmd = &ack_receiver_tbl[pi.cmd]; 6099668700b4SPhilipp Reisner if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) { 61001ec861ebSAndreas Gruenbacher drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", 61012fcb8f30SAndreas Gruenbacher cmdname(pi.cmd), pi.cmd); 6102b411b363SPhilipp Reisner goto disconnect; 6103b411b363SPhilipp Reisner } 6104e658983aSAndreas Gruenbacher expect = header_size + cmd->pkt_size; 610552b061a4SAndreas Gruenbacher if (pi.size != expect - header_size) { 61061ec861ebSAndreas Gruenbacher drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n", 610777351055SPhilipp Reisner pi.cmd, pi.size); 6108b411b363SPhilipp Reisner goto reconnect; 6109b411b363SPhilipp Reisner } 6110257d0af6SPhilipp Reisner } 6111b411b363SPhilipp Reisner if (received == expect) { 61122735a594SAndreas Gruenbacher bool err; 6113a4fbda8eSPhilipp Reisner 6114bde89a9eSAndreas Gruenbacher err = cmd->fn(connection, &pi); 61152735a594SAndreas Gruenbacher if (err) { 6116d75f773cSSakari Ailus drbd_err(connection, "%ps failed\n", cmd->fn); 6117b411b363SPhilipp Reisner goto reconnect; 61181952e916SAndreas Gruenbacher } 6119b411b363SPhilipp Reisner 6120bde89a9eSAndreas Gruenbacher connection->last_received = jiffies; 6121f36af18cSLars Ellenberg 6122668700b4SPhilipp Reisner if (cmd == &ack_receiver_tbl[P_PING_ACK]) { 6123668700b4SPhilipp Reisner set_idle_timeout(connection); 612444ed167dSPhilipp Reisner ping_timeout_active = false; 612544ed167dSPhilipp Reisner } 6126b411b363SPhilipp Reisner 6127bde89a9eSAndreas Gruenbacher buf = connection->meta.rbuf; 6128b411b363SPhilipp Reisner received = 0; 612952b061a4SAndreas Gruenbacher expect = header_size; 6130b411b363SPhilipp Reisner cmd = NULL; 6131b411b363SPhilipp Reisner } 6132b411b363SPhilipp Reisner } 6133b411b363SPhilipp Reisner 6134b411b363SPhilipp Reisner if (0) { 6135b411b363SPhilipp Reisner reconnect: 6136bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 6137bde89a9eSAndreas Gruenbacher conn_md_sync(connection); 6138b411b363SPhilipp Reisner } 6139b411b363SPhilipp Reisner if (0) { 6140b411b363SPhilipp Reisner disconnect: 6141bde89a9eSAndreas Gruenbacher conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); 6142b411b363SPhilipp Reisner } 6143b411b363SPhilipp Reisner 6144668700b4SPhilipp Reisner drbd_info(connection, "ack_receiver terminated\n"); 6145b411b363SPhilipp Reisner 6146b411b363SPhilipp Reisner return 0; 6147b411b363SPhilipp Reisner } 6148668700b4SPhilipp Reisner 6149668700b4SPhilipp Reisner void drbd_send_acks_wf(struct work_struct *ws) 6150668700b4SPhilipp Reisner { 6151668700b4SPhilipp Reisner struct drbd_peer_device *peer_device = 6152668700b4SPhilipp Reisner container_of(ws, struct drbd_peer_device, send_acks_work); 6153668700b4SPhilipp Reisner struct drbd_connection *connection = peer_device->connection; 6154668700b4SPhilipp Reisner struct drbd_device *device = peer_device->device; 6155668700b4SPhilipp Reisner struct net_conf *nc; 6156668700b4SPhilipp Reisner int tcp_cork, err; 6157668700b4SPhilipp Reisner 6158668700b4SPhilipp Reisner rcu_read_lock(); 6159668700b4SPhilipp Reisner nc = rcu_dereference(connection->net_conf); 6160668700b4SPhilipp Reisner tcp_cork = nc->tcp_cork; 6161668700b4SPhilipp Reisner rcu_read_unlock(); 6162668700b4SPhilipp Reisner 6163668700b4SPhilipp Reisner if (tcp_cork) 6164668700b4SPhilipp Reisner drbd_tcp_cork(connection->meta.socket); 6165668700b4SPhilipp Reisner 6166668700b4SPhilipp Reisner err = drbd_finish_peer_reqs(device); 6167668700b4SPhilipp Reisner kref_put(&device->kref, drbd_destroy_device); 6168668700b4SPhilipp Reisner /* get is in drbd_endio_write_sec_final(). That is necessary to keep the 6169668700b4SPhilipp Reisner struct work_struct send_acks_work alive, which is in the peer_device object */ 6170668700b4SPhilipp Reisner 6171668700b4SPhilipp Reisner if (err) { 6172668700b4SPhilipp Reisner conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); 6173668700b4SPhilipp Reisner return; 6174668700b4SPhilipp Reisner } 6175668700b4SPhilipp Reisner 6176668700b4SPhilipp Reisner if (tcp_cork) 6177668700b4SPhilipp Reisner drbd_tcp_uncork(connection->meta.socket); 6178668700b4SPhilipp Reisner 6179668700b4SPhilipp Reisner return; 6180668700b4SPhilipp Reisner } 6181