1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev 11b411b363SPhilipp Reisner from Logicworks, Inc. for making SDP replication support possible. 12b411b363SPhilipp Reisner 13b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 14b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 15b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 16b411b363SPhilipp Reisner any later version. 17b411b363SPhilipp Reisner 18b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 19b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 20b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21b411b363SPhilipp Reisner GNU General Public License for more details. 22b411b363SPhilipp Reisner 23b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 24b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 25b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 26b411b363SPhilipp Reisner 27b411b363SPhilipp Reisner */ 28b411b363SPhilipp Reisner 29b411b363SPhilipp Reisner #include <linux/module.h> 30b411b363SPhilipp Reisner #include <linux/drbd.h> 31b411b363SPhilipp Reisner #include <asm/uaccess.h> 32b411b363SPhilipp Reisner #include <asm/types.h> 33b411b363SPhilipp Reisner #include <net/sock.h> 34b411b363SPhilipp Reisner #include <linux/ctype.h> 352a48fc0aSArnd Bergmann #include <linux/mutex.h> 36b411b363SPhilipp Reisner #include <linux/fs.h> 37b411b363SPhilipp Reisner #include <linux/file.h> 38b411b363SPhilipp Reisner #include <linux/proc_fs.h> 39b411b363SPhilipp Reisner #include <linux/init.h> 40b411b363SPhilipp Reisner #include <linux/mm.h> 41b411b363SPhilipp Reisner #include <linux/memcontrol.h> 42b411b363SPhilipp Reisner #include <linux/mm_inline.h> 43b411b363SPhilipp Reisner #include <linux/slab.h> 44b411b363SPhilipp Reisner #include <linux/random.h> 45b411b363SPhilipp Reisner #include <linux/reboot.h> 46b411b363SPhilipp Reisner #include <linux/notifier.h> 47b411b363SPhilipp Reisner #include <linux/kthread.h> 48b411b363SPhilipp Reisner 49b411b363SPhilipp Reisner #define __KERNEL_SYSCALLS__ 50b411b363SPhilipp Reisner #include <linux/unistd.h> 51b411b363SPhilipp Reisner #include <linux/vmalloc.h> 52b411b363SPhilipp Reisner 53b411b363SPhilipp Reisner #include <linux/drbd_limits.h> 54b411b363SPhilipp Reisner #include "drbd_int.h" 55b411b363SPhilipp Reisner #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ 56b411b363SPhilipp Reisner 57b411b363SPhilipp Reisner #include "drbd_vli.h" 58b411b363SPhilipp Reisner 592a48fc0aSArnd Bergmann static DEFINE_MUTEX(drbd_main_mutex); 60b411b363SPhilipp Reisner int drbdd_init(struct drbd_thread *); 61b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *); 62b411b363SPhilipp Reisner int drbd_asender(struct drbd_thread *); 63b411b363SPhilipp Reisner 64b411b363SPhilipp Reisner int drbd_init(void); 65b411b363SPhilipp Reisner static int drbd_open(struct block_device *bdev, fmode_t mode); 66b411b363SPhilipp Reisner static int drbd_release(struct gendisk *gd, fmode_t mode); 6799920dc5SAndreas Gruenbacher static int w_md_sync(struct drbd_work *w, int unused); 68b411b363SPhilipp Reisner static void md_sync_timer_fn(unsigned long data); 6999920dc5SAndreas Gruenbacher static int w_bitmap_io(struct drbd_work *w, int unused); 7099920dc5SAndreas Gruenbacher static int w_go_diskless(struct drbd_work *w, int unused); 71b411b363SPhilipp Reisner 72b411b363SPhilipp Reisner MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 73b411b363SPhilipp Reisner "Lars Ellenberg <lars@linbit.com>"); 74b411b363SPhilipp Reisner MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); 75b411b363SPhilipp Reisner MODULE_VERSION(REL_VERSION); 76b411b363SPhilipp Reisner MODULE_LICENSE("GPL"); 7781a5d60eSPhilipp Reisner MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" 782b8a90b5SPhilipp Reisner __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); 79b411b363SPhilipp Reisner MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); 80b411b363SPhilipp Reisner 81b411b363SPhilipp Reisner #include <linux/moduleparam.h> 82b411b363SPhilipp Reisner /* allow_open_on_secondary */ 83b411b363SPhilipp Reisner MODULE_PARM_DESC(allow_oos, "DONT USE!"); 84b411b363SPhilipp Reisner /* thanks to these macros, if compiled into the kernel (not-module), 85b411b363SPhilipp Reisner * this becomes the boot parameter drbd.minor_count */ 86b411b363SPhilipp Reisner module_param(minor_count, uint, 0444); 87b411b363SPhilipp Reisner module_param(disable_sendpage, bool, 0644); 88b411b363SPhilipp Reisner module_param(allow_oos, bool, 0); 89b411b363SPhilipp Reisner module_param(proc_details, int, 0644); 90b411b363SPhilipp Reisner 91b411b363SPhilipp Reisner #ifdef CONFIG_DRBD_FAULT_INJECTION 92b411b363SPhilipp Reisner int enable_faults; 93b411b363SPhilipp Reisner int fault_rate; 94b411b363SPhilipp Reisner static int fault_count; 95b411b363SPhilipp Reisner int fault_devs; 96b411b363SPhilipp Reisner /* bitmap of enabled faults */ 97b411b363SPhilipp Reisner module_param(enable_faults, int, 0664); 98b411b363SPhilipp Reisner /* fault rate % value - applies to all enabled faults */ 99b411b363SPhilipp Reisner module_param(fault_rate, int, 0664); 100b411b363SPhilipp Reisner /* count of faults inserted */ 101b411b363SPhilipp Reisner module_param(fault_count, int, 0664); 102b411b363SPhilipp Reisner /* bitmap of devices to insert faults on */ 103b411b363SPhilipp Reisner module_param(fault_devs, int, 0644); 104b411b363SPhilipp Reisner #endif 105b411b363SPhilipp Reisner 106b411b363SPhilipp Reisner /* module parameter, defined */ 1072b8a90b5SPhilipp Reisner unsigned int minor_count = DRBD_MINOR_COUNT_DEF; 108b411b363SPhilipp Reisner int disable_sendpage; 109b411b363SPhilipp Reisner int allow_oos; 110b411b363SPhilipp Reisner int proc_details; /* Detail level in proc drbd*/ 111b411b363SPhilipp Reisner 112b411b363SPhilipp Reisner /* Module parameter for setting the user mode helper program 113b411b363SPhilipp Reisner * to run. Default is /sbin/drbdadm */ 114b411b363SPhilipp Reisner char usermode_helper[80] = "/sbin/drbdadm"; 115b411b363SPhilipp Reisner 116b411b363SPhilipp Reisner module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644); 117b411b363SPhilipp Reisner 118b411b363SPhilipp Reisner /* in 2.6.x, our device mapping and config info contains our virtual gendisks 119b411b363SPhilipp Reisner * as member "struct gendisk *vdisk;" 120b411b363SPhilipp Reisner */ 12181a5d60eSPhilipp Reisner struct idr minors; 1222111438bSPhilipp Reisner struct list_head drbd_tconns; /* list of struct drbd_tconn */ 123b411b363SPhilipp Reisner 124b411b363SPhilipp Reisner struct kmem_cache *drbd_request_cache; 1256c852becSAndreas Gruenbacher struct kmem_cache *drbd_ee_cache; /* peer requests */ 126b411b363SPhilipp Reisner struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ 127b411b363SPhilipp Reisner struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ 128b411b363SPhilipp Reisner mempool_t *drbd_request_mempool; 129b411b363SPhilipp Reisner mempool_t *drbd_ee_mempool; 13035abf594SLars Ellenberg mempool_t *drbd_md_io_page_pool; 131da4a75d2SLars Ellenberg struct bio_set *drbd_md_io_bio_set; 132b411b363SPhilipp Reisner 133b411b363SPhilipp Reisner /* I do not use a standard mempool, because: 134b411b363SPhilipp Reisner 1) I want to hand out the pre-allocated objects first. 135b411b363SPhilipp Reisner 2) I want to be able to interrupt sleeping allocation with a signal. 136b411b363SPhilipp Reisner Note: This is a single linked list, the next pointer is the private 137b411b363SPhilipp Reisner member of struct page. 138b411b363SPhilipp Reisner */ 139b411b363SPhilipp Reisner struct page *drbd_pp_pool; 140b411b363SPhilipp Reisner spinlock_t drbd_pp_lock; 141b411b363SPhilipp Reisner int drbd_pp_vacant; 142b411b363SPhilipp Reisner wait_queue_head_t drbd_pp_wait; 143b411b363SPhilipp Reisner 144b411b363SPhilipp Reisner DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); 145b411b363SPhilipp Reisner 1467d4e9d09SEmese Revfy static const struct block_device_operations drbd_ops = { 147b411b363SPhilipp Reisner .owner = THIS_MODULE, 148b411b363SPhilipp Reisner .open = drbd_open, 149b411b363SPhilipp Reisner .release = drbd_release, 150b411b363SPhilipp Reisner }; 151b411b363SPhilipp Reisner 152da4a75d2SLars Ellenberg static void bio_destructor_drbd(struct bio *bio) 153da4a75d2SLars Ellenberg { 154da4a75d2SLars Ellenberg bio_free(bio, drbd_md_io_bio_set); 155da4a75d2SLars Ellenberg } 156da4a75d2SLars Ellenberg 157da4a75d2SLars Ellenberg struct bio *bio_alloc_drbd(gfp_t gfp_mask) 158da4a75d2SLars Ellenberg { 159da4a75d2SLars Ellenberg struct bio *bio; 160da4a75d2SLars Ellenberg 161da4a75d2SLars Ellenberg if (!drbd_md_io_bio_set) 162da4a75d2SLars Ellenberg return bio_alloc(gfp_mask, 1); 163da4a75d2SLars Ellenberg 164da4a75d2SLars Ellenberg bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); 165da4a75d2SLars Ellenberg if (!bio) 166da4a75d2SLars Ellenberg return NULL; 167da4a75d2SLars Ellenberg bio->bi_destructor = bio_destructor_drbd; 168da4a75d2SLars Ellenberg return bio; 169da4a75d2SLars Ellenberg } 170da4a75d2SLars Ellenberg 171b411b363SPhilipp Reisner #ifdef __CHECKER__ 172b411b363SPhilipp Reisner /* When checking with sparse, and this is an inline function, sparse will 173b411b363SPhilipp Reisner give tons of false positives. When this is a real functions sparse works. 174b411b363SPhilipp Reisner */ 175b411b363SPhilipp Reisner int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) 176b411b363SPhilipp Reisner { 177b411b363SPhilipp Reisner int io_allowed; 178b411b363SPhilipp Reisner 179b411b363SPhilipp Reisner atomic_inc(&mdev->local_cnt); 180b411b363SPhilipp Reisner io_allowed = (mdev->state.disk >= mins); 181b411b363SPhilipp Reisner if (!io_allowed) { 182b411b363SPhilipp Reisner if (atomic_dec_and_test(&mdev->local_cnt)) 183b411b363SPhilipp Reisner wake_up(&mdev->misc_wait); 184b411b363SPhilipp Reisner } 185b411b363SPhilipp Reisner return io_allowed; 186b411b363SPhilipp Reisner } 187b411b363SPhilipp Reisner 188b411b363SPhilipp Reisner #endif 189b411b363SPhilipp Reisner 190b411b363SPhilipp Reisner /** 191b411b363SPhilipp Reisner * DOC: The transfer log 192b411b363SPhilipp Reisner * 193b411b363SPhilipp Reisner * The transfer log is a single linked list of &struct drbd_tl_epoch objects. 19487eeee41SPhilipp Reisner * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail 195b411b363SPhilipp Reisner * of the list. There is always at least one &struct drbd_tl_epoch object. 196b411b363SPhilipp Reisner * 197b411b363SPhilipp Reisner * Each &struct drbd_tl_epoch has a circular double linked list of requests 198b411b363SPhilipp Reisner * attached. 199b411b363SPhilipp Reisner */ 2002f5cdd0bSPhilipp Reisner static int tl_init(struct drbd_tconn *tconn) 201b411b363SPhilipp Reisner { 202b411b363SPhilipp Reisner struct drbd_tl_epoch *b; 203b411b363SPhilipp Reisner 204b411b363SPhilipp Reisner /* during device minor initialization, we may well use GFP_KERNEL */ 205b411b363SPhilipp Reisner b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); 206b411b363SPhilipp Reisner if (!b) 207b411b363SPhilipp Reisner return 0; 208b411b363SPhilipp Reisner INIT_LIST_HEAD(&b->requests); 209b411b363SPhilipp Reisner INIT_LIST_HEAD(&b->w.list); 210b411b363SPhilipp Reisner b->next = NULL; 211b411b363SPhilipp Reisner b->br_number = 4711; 2127e602c0aSPhilipp Reisner b->n_writes = 0; 213b411b363SPhilipp Reisner b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ 214b411b363SPhilipp Reisner 2152f5cdd0bSPhilipp Reisner tconn->oldest_tle = b; 2162f5cdd0bSPhilipp Reisner tconn->newest_tle = b; 2172f5cdd0bSPhilipp Reisner INIT_LIST_HEAD(&tconn->out_of_sequence_requests); 218cdfda633SPhilipp Reisner INIT_LIST_HEAD(&tconn->barrier_acked_requests); 219b411b363SPhilipp Reisner 220b411b363SPhilipp Reisner return 1; 221b411b363SPhilipp Reisner } 222b411b363SPhilipp Reisner 2232f5cdd0bSPhilipp Reisner static void tl_cleanup(struct drbd_tconn *tconn) 224b411b363SPhilipp Reisner { 2252f5cdd0bSPhilipp Reisner if (tconn->oldest_tle != tconn->newest_tle) 2262f5cdd0bSPhilipp Reisner conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n"); 2272f5cdd0bSPhilipp Reisner if (!list_empty(&tconn->out_of_sequence_requests)) 2282f5cdd0bSPhilipp Reisner conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n"); 2292f5cdd0bSPhilipp Reisner kfree(tconn->oldest_tle); 2302f5cdd0bSPhilipp Reisner tconn->oldest_tle = NULL; 2312f5cdd0bSPhilipp Reisner kfree(tconn->unused_spare_tle); 2322f5cdd0bSPhilipp Reisner tconn->unused_spare_tle = NULL; 233d628769bSAndreas Gruenbacher } 234d628769bSAndreas Gruenbacher 235b411b363SPhilipp Reisner /** 236b411b363SPhilipp Reisner * _tl_add_barrier() - Adds a barrier to the transfer log 237b411b363SPhilipp Reisner * @mdev: DRBD device. 238b411b363SPhilipp Reisner * @new: Barrier to be added before the current head of the TL. 239b411b363SPhilipp Reisner * 240b411b363SPhilipp Reisner * The caller must hold the req_lock. 241b411b363SPhilipp Reisner */ 2422f5cdd0bSPhilipp Reisner void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) 243b411b363SPhilipp Reisner { 244b411b363SPhilipp Reisner struct drbd_tl_epoch *newest_before; 245b411b363SPhilipp Reisner 246b411b363SPhilipp Reisner INIT_LIST_HEAD(&new->requests); 247b411b363SPhilipp Reisner INIT_LIST_HEAD(&new->w.list); 248b411b363SPhilipp Reisner new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ 249b411b363SPhilipp Reisner new->next = NULL; 2507e602c0aSPhilipp Reisner new->n_writes = 0; 251b411b363SPhilipp Reisner 2522f5cdd0bSPhilipp Reisner newest_before = tconn->newest_tle; 253b411b363SPhilipp Reisner /* never send a barrier number == 0, because that is special-cased 254b411b363SPhilipp Reisner * when using TCQ for our write ordering code */ 255b411b363SPhilipp Reisner new->br_number = (newest_before->br_number+1) ?: 1; 2562f5cdd0bSPhilipp Reisner if (tconn->newest_tle != new) { 2572f5cdd0bSPhilipp Reisner tconn->newest_tle->next = new; 2582f5cdd0bSPhilipp Reisner tconn->newest_tle = new; 259b411b363SPhilipp Reisner } 260b411b363SPhilipp Reisner } 261b411b363SPhilipp Reisner 262b411b363SPhilipp Reisner /** 263b411b363SPhilipp Reisner * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL 264b411b363SPhilipp Reisner * @mdev: DRBD device. 265b411b363SPhilipp Reisner * @barrier_nr: Expected identifier of the DRBD write barrier packet. 266b411b363SPhilipp Reisner * @set_size: Expected number of requests before that barrier. 267b411b363SPhilipp Reisner * 268b411b363SPhilipp Reisner * In case the passed barrier_nr or set_size does not match the oldest 269b411b363SPhilipp Reisner * &struct drbd_tl_epoch objects this function will cause a termination 270b411b363SPhilipp Reisner * of the connection. 271b411b363SPhilipp Reisner */ 2722f5cdd0bSPhilipp Reisner void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, 273b411b363SPhilipp Reisner unsigned int set_size) 274b411b363SPhilipp Reisner { 2752f5cdd0bSPhilipp Reisner struct drbd_conf *mdev; 276b411b363SPhilipp Reisner struct drbd_tl_epoch *b, *nob; /* next old barrier */ 277b411b363SPhilipp Reisner struct list_head *le, *tle; 278b411b363SPhilipp Reisner struct drbd_request *r; 279b411b363SPhilipp Reisner 2802f5cdd0bSPhilipp Reisner spin_lock_irq(&tconn->req_lock); 281b411b363SPhilipp Reisner 2822f5cdd0bSPhilipp Reisner b = tconn->oldest_tle; 283b411b363SPhilipp Reisner 284b411b363SPhilipp Reisner /* first some paranoia code */ 285b411b363SPhilipp Reisner if (b == NULL) { 2862f5cdd0bSPhilipp Reisner conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", 287b411b363SPhilipp Reisner barrier_nr); 288b411b363SPhilipp Reisner goto bail; 289b411b363SPhilipp Reisner } 290b411b363SPhilipp Reisner if (b->br_number != barrier_nr) { 2912f5cdd0bSPhilipp Reisner conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", 292b411b363SPhilipp Reisner barrier_nr, b->br_number); 293b411b363SPhilipp Reisner goto bail; 294b411b363SPhilipp Reisner } 2957e602c0aSPhilipp Reisner if (b->n_writes != set_size) { 2962f5cdd0bSPhilipp Reisner conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", 2977e602c0aSPhilipp Reisner barrier_nr, set_size, b->n_writes); 298b411b363SPhilipp Reisner goto bail; 299b411b363SPhilipp Reisner } 300b411b363SPhilipp Reisner 301b411b363SPhilipp Reisner /* Clean up list of requests processed during current epoch */ 302b411b363SPhilipp Reisner list_for_each_safe(le, tle, &b->requests) { 303b411b363SPhilipp Reisner r = list_entry(le, struct drbd_request, tl_requests); 3048554df1cSAndreas Gruenbacher _req_mod(r, BARRIER_ACKED); 305b411b363SPhilipp Reisner } 306b411b363SPhilipp Reisner /* There could be requests on the list waiting for completion 307b411b363SPhilipp Reisner of the write to the local disk. To avoid corruptions of 308b411b363SPhilipp Reisner slab's data structures we have to remove the lists head. 309b411b363SPhilipp Reisner 310b411b363SPhilipp Reisner Also there could have been a barrier ack out of sequence, overtaking 311b411b363SPhilipp Reisner the write acks - which would be a bug and violating write ordering. 312b411b363SPhilipp Reisner To not deadlock in case we lose connection while such requests are 313b411b363SPhilipp Reisner still pending, we need some way to find them for the 3148554df1cSAndreas Gruenbacher _req_mode(CONNECTION_LOST_WHILE_PENDING). 315b411b363SPhilipp Reisner 316b411b363SPhilipp Reisner These have been list_move'd to the out_of_sequence_requests list in 3178554df1cSAndreas Gruenbacher _req_mod(, BARRIER_ACKED) above. 318b411b363SPhilipp Reisner */ 319cdfda633SPhilipp Reisner list_splice_init(&b->requests, &tconn->barrier_acked_requests); 3202f5cdd0bSPhilipp Reisner mdev = b->w.mdev; 321b411b363SPhilipp Reisner 322b411b363SPhilipp Reisner nob = b->next; 323b411b363SPhilipp Reisner if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { 3242f5cdd0bSPhilipp Reisner _tl_add_barrier(tconn, b); 325b411b363SPhilipp Reisner if (nob) 3262f5cdd0bSPhilipp Reisner tconn->oldest_tle = nob; 327b411b363SPhilipp Reisner /* if nob == NULL b was the only barrier, and becomes the new 3282f5cdd0bSPhilipp Reisner barrier. Therefore tconn->oldest_tle points already to b */ 329b411b363SPhilipp Reisner } else { 330b411b363SPhilipp Reisner D_ASSERT(nob != NULL); 3312f5cdd0bSPhilipp Reisner tconn->oldest_tle = nob; 332b411b363SPhilipp Reisner kfree(b); 333b411b363SPhilipp Reisner } 334b411b363SPhilipp Reisner 3352f5cdd0bSPhilipp Reisner spin_unlock_irq(&tconn->req_lock); 336b411b363SPhilipp Reisner dec_ap_pending(mdev); 337b411b363SPhilipp Reisner 338b411b363SPhilipp Reisner return; 339b411b363SPhilipp Reisner 340b411b363SPhilipp Reisner bail: 3412f5cdd0bSPhilipp Reisner spin_unlock_irq(&tconn->req_lock); 3422f5cdd0bSPhilipp Reisner conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); 343b411b363SPhilipp Reisner } 344b411b363SPhilipp Reisner 345617049aaSPhilipp Reisner 346b411b363SPhilipp Reisner /** 34711b58e73SPhilipp Reisner * _tl_restart() - Walks the transfer log, and applies an action to all requests 348b411b363SPhilipp Reisner * @mdev: DRBD device. 34911b58e73SPhilipp Reisner * @what: The action/event to perform with all request objects 350b411b363SPhilipp Reisner * 3518554df1cSAndreas Gruenbacher * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, 3528554df1cSAndreas Gruenbacher * RESTART_FROZEN_DISK_IO. 353b411b363SPhilipp Reisner */ 3542f5cdd0bSPhilipp Reisner void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) 355b411b363SPhilipp Reisner { 35611b58e73SPhilipp Reisner struct drbd_tl_epoch *b, *tmp, **pn; 357b9b98716SPhilipp Reisner struct list_head *le, *tle, carry_reads; 35811b58e73SPhilipp Reisner struct drbd_request *req; 35911b58e73SPhilipp Reisner int rv, n_writes, n_reads; 360b411b363SPhilipp Reisner 3612f5cdd0bSPhilipp Reisner b = tconn->oldest_tle; 3622f5cdd0bSPhilipp Reisner pn = &tconn->oldest_tle; 363b411b363SPhilipp Reisner while (b) { 36411b58e73SPhilipp Reisner n_writes = 0; 36511b58e73SPhilipp Reisner n_reads = 0; 366b9b98716SPhilipp Reisner INIT_LIST_HEAD(&carry_reads); 367b411b363SPhilipp Reisner list_for_each_safe(le, tle, &b->requests) { 36811b58e73SPhilipp Reisner req = list_entry(le, struct drbd_request, tl_requests); 36911b58e73SPhilipp Reisner rv = _req_mod(req, what); 37011b58e73SPhilipp Reisner 371f497609eSAndreas Gruenbacher if (rv & MR_WRITE) 372f497609eSAndreas Gruenbacher n_writes++; 373f497609eSAndreas Gruenbacher if (rv & MR_READ) 374f497609eSAndreas Gruenbacher n_reads++; 375b411b363SPhilipp Reisner } 376b411b363SPhilipp Reisner tmp = b->next; 377b411b363SPhilipp Reisner 378b9b98716SPhilipp Reisner if (n_writes) { 3798554df1cSAndreas Gruenbacher if (what == RESEND) { 38011b58e73SPhilipp Reisner b->n_writes = n_writes; 38111b58e73SPhilipp Reisner if (b->w.cb == NULL) { 38211b58e73SPhilipp Reisner b->w.cb = w_send_barrier; 3832f5cdd0bSPhilipp Reisner inc_ap_pending(b->w.mdev); 3842f5cdd0bSPhilipp Reisner set_bit(CREATE_BARRIER, &b->w.mdev->flags); 38511b58e73SPhilipp Reisner } 38611b58e73SPhilipp Reisner 3872f5cdd0bSPhilipp Reisner drbd_queue_work(&tconn->data.work, &b->w); 38811b58e73SPhilipp Reisner } 38911b58e73SPhilipp Reisner pn = &b->next; 39011b58e73SPhilipp Reisner } else { 391b9b98716SPhilipp Reisner if (n_reads) 392b9b98716SPhilipp Reisner list_add(&carry_reads, &b->requests); 393b411b363SPhilipp Reisner /* there could still be requests on that ring list, 394b411b363SPhilipp Reisner * in case local io is still pending */ 395b411b363SPhilipp Reisner list_del(&b->requests); 396b411b363SPhilipp Reisner 397b411b363SPhilipp Reisner /* dec_ap_pending corresponding to queue_barrier. 398b411b363SPhilipp Reisner * the newest barrier may not have been queued yet, 399b411b363SPhilipp Reisner * in which case w.cb is still NULL. */ 400b411b363SPhilipp Reisner if (b->w.cb != NULL) 4012f5cdd0bSPhilipp Reisner dec_ap_pending(b->w.mdev); 402b411b363SPhilipp Reisner 4032f5cdd0bSPhilipp Reisner if (b == tconn->newest_tle) { 404b411b363SPhilipp Reisner /* recycle, but reinit! */ 4052f5cdd0bSPhilipp Reisner if (tmp != NULL) 4062f5cdd0bSPhilipp Reisner conn_err(tconn, "ASSERT FAILED tmp == NULL"); 407b411b363SPhilipp Reisner INIT_LIST_HEAD(&b->requests); 408b9b98716SPhilipp Reisner list_splice(&carry_reads, &b->requests); 409b411b363SPhilipp Reisner INIT_LIST_HEAD(&b->w.list); 410b411b363SPhilipp Reisner b->w.cb = NULL; 41111b58e73SPhilipp Reisner b->br_number = net_random(); 4127e602c0aSPhilipp Reisner b->n_writes = 0; 413b411b363SPhilipp Reisner 41411b58e73SPhilipp Reisner *pn = b; 415b411b363SPhilipp Reisner break; 416b411b363SPhilipp Reisner } 41711b58e73SPhilipp Reisner *pn = tmp; 418b411b363SPhilipp Reisner kfree(b); 419b411b363SPhilipp Reisner } 420b411b363SPhilipp Reisner b = tmp; 421b9b98716SPhilipp Reisner list_splice(&carry_reads, &b->requests); 422b411b363SPhilipp Reisner } 42311b58e73SPhilipp Reisner 424cdfda633SPhilipp Reisner /* Actions operating on the disk state, also want to work on 425cdfda633SPhilipp Reisner requests that got barrier acked. */ 426cdfda633SPhilipp Reisner switch (what) { 427cdfda633SPhilipp Reisner case FAIL_FROZEN_DISK_IO: 428cdfda633SPhilipp Reisner case RESTART_FROZEN_DISK_IO: 429cdfda633SPhilipp Reisner list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { 430cdfda633SPhilipp Reisner req = list_entry(le, struct drbd_request, tl_requests); 431cdfda633SPhilipp Reisner _req_mod(req, what); 432cdfda633SPhilipp Reisner } 433cdfda633SPhilipp Reisner case CONNECTION_LOST_WHILE_PENDING: 434cdfda633SPhilipp Reisner case RESEND: 435cdfda633SPhilipp Reisner break; 436cdfda633SPhilipp Reisner default: 437cdfda633SPhilipp Reisner conn_err(tconn, "what = %d in _tl_restart()\n", what); 438cdfda633SPhilipp Reisner } 439cdfda633SPhilipp Reisner } 44011b58e73SPhilipp Reisner 44111b58e73SPhilipp Reisner /** 44211b58e73SPhilipp Reisner * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL 44311b58e73SPhilipp Reisner * @mdev: DRBD device. 44411b58e73SPhilipp Reisner * 44511b58e73SPhilipp Reisner * This is called after the connection to the peer was lost. The storage covered 44611b58e73SPhilipp Reisner * by the requests on the transfer gets marked as our of sync. Called from the 44711b58e73SPhilipp Reisner * receiver thread and the worker thread. 44811b58e73SPhilipp Reisner */ 4492f5cdd0bSPhilipp Reisner void tl_clear(struct drbd_tconn *tconn) 45011b58e73SPhilipp Reisner { 4512f5cdd0bSPhilipp Reisner struct drbd_conf *mdev; 45211b58e73SPhilipp Reisner struct list_head *le, *tle; 45311b58e73SPhilipp Reisner struct drbd_request *r; 454e90285e0SPhilipp Reisner int vnr; 45511b58e73SPhilipp Reisner 4562f5cdd0bSPhilipp Reisner spin_lock_irq(&tconn->req_lock); 45711b58e73SPhilipp Reisner 4582f5cdd0bSPhilipp Reisner _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); 459b411b363SPhilipp Reisner 460b411b363SPhilipp Reisner /* we expect this list to be empty. */ 4612f5cdd0bSPhilipp Reisner if (!list_empty(&tconn->out_of_sequence_requests)) 4622f5cdd0bSPhilipp Reisner conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n"); 463b411b363SPhilipp Reisner 464b411b363SPhilipp Reisner /* but just in case, clean it up anyways! */ 4652f5cdd0bSPhilipp Reisner list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) { 466b411b363SPhilipp Reisner r = list_entry(le, struct drbd_request, tl_requests); 467b411b363SPhilipp Reisner /* It would be nice to complete outside of spinlock. 468b411b363SPhilipp Reisner * But this is easier for now. */ 4698554df1cSAndreas Gruenbacher _req_mod(r, CONNECTION_LOST_WHILE_PENDING); 470b411b363SPhilipp Reisner } 471b411b363SPhilipp Reisner 472b411b363SPhilipp Reisner /* ensure bit indicating barrier is required is clear */ 473695d08faSPhilipp Reisner rcu_read_lock(); 474e90285e0SPhilipp Reisner idr_for_each_entry(&tconn->volumes, mdev, vnr) 475b411b363SPhilipp Reisner clear_bit(CREATE_BARRIER, &mdev->flags); 476695d08faSPhilipp Reisner rcu_read_unlock(); 477b411b363SPhilipp Reisner 4782f5cdd0bSPhilipp Reisner spin_unlock_irq(&tconn->req_lock); 479b411b363SPhilipp Reisner } 480b411b363SPhilipp Reisner 4812f5cdd0bSPhilipp Reisner void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) 48211b58e73SPhilipp Reisner { 4832f5cdd0bSPhilipp Reisner spin_lock_irq(&tconn->req_lock); 4842f5cdd0bSPhilipp Reisner _tl_restart(tconn, what); 4852f5cdd0bSPhilipp Reisner spin_unlock_irq(&tconn->req_lock); 486b411b363SPhilipp Reisner } 487b411b363SPhilipp Reisner 488cdfda633SPhilipp Reisner /** 48971fc7eedSAndreas Gruenbacher * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL 490cdfda633SPhilipp Reisner * @mdev: DRBD device. 491cdfda633SPhilipp Reisner */ 49271fc7eedSAndreas Gruenbacher void tl_abort_disk_io(struct drbd_conf *mdev) 493cdfda633SPhilipp Reisner { 494cdfda633SPhilipp Reisner struct drbd_tconn *tconn = mdev->tconn; 495cdfda633SPhilipp Reisner struct drbd_tl_epoch *b; 496cdfda633SPhilipp Reisner struct list_head *le, *tle; 497cdfda633SPhilipp Reisner struct drbd_request *req; 498cdfda633SPhilipp Reisner 499cdfda633SPhilipp Reisner spin_lock_irq(&tconn->req_lock); 500cdfda633SPhilipp Reisner b = tconn->oldest_tle; 501cdfda633SPhilipp Reisner while (b) { 502cdfda633SPhilipp Reisner list_for_each_safe(le, tle, &b->requests) { 503cdfda633SPhilipp Reisner req = list_entry(le, struct drbd_request, tl_requests); 50497ddb687SLars Ellenberg if (!(req->rq_state & RQ_LOCAL_PENDING)) 50597ddb687SLars Ellenberg continue; 506cdfda633SPhilipp Reisner if (req->w.mdev == mdev) 50771fc7eedSAndreas Gruenbacher _req_mod(req, ABORT_DISK_IO); 508cdfda633SPhilipp Reisner } 509cdfda633SPhilipp Reisner b = b->next; 510cdfda633SPhilipp Reisner } 511cdfda633SPhilipp Reisner 512cdfda633SPhilipp Reisner list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { 513cdfda633SPhilipp Reisner req = list_entry(le, struct drbd_request, tl_requests); 51497ddb687SLars Ellenberg if (!(req->rq_state & RQ_LOCAL_PENDING)) 51597ddb687SLars Ellenberg continue; 516cdfda633SPhilipp Reisner if (req->w.mdev == mdev) 51771fc7eedSAndreas Gruenbacher _req_mod(req, ABORT_DISK_IO); 518cdfda633SPhilipp Reisner } 519cdfda633SPhilipp Reisner 520cdfda633SPhilipp Reisner spin_unlock_irq(&tconn->req_lock); 521cdfda633SPhilipp Reisner } 522cdfda633SPhilipp Reisner 523b411b363SPhilipp Reisner static int drbd_thread_setup(void *arg) 524b411b363SPhilipp Reisner { 525b411b363SPhilipp Reisner struct drbd_thread *thi = (struct drbd_thread *) arg; 526392c8801SPhilipp Reisner struct drbd_tconn *tconn = thi->tconn; 527b411b363SPhilipp Reisner unsigned long flags; 528b411b363SPhilipp Reisner int retval; 529b411b363SPhilipp Reisner 530f1b3a6ecSPhilipp Reisner snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", 531392c8801SPhilipp Reisner thi->name[0], thi->tconn->name); 532f1b3a6ecSPhilipp Reisner 533b411b363SPhilipp Reisner restart: 534b411b363SPhilipp Reisner retval = thi->function(thi); 535b411b363SPhilipp Reisner 536b411b363SPhilipp Reisner spin_lock_irqsave(&thi->t_lock, flags); 537b411b363SPhilipp Reisner 538e77a0a5cSAndreas Gruenbacher /* if the receiver has been "EXITING", the last thing it did 539b411b363SPhilipp Reisner * was set the conn state to "StandAlone", 540b411b363SPhilipp Reisner * if now a re-connect request comes in, conn state goes C_UNCONNECTED, 541b411b363SPhilipp Reisner * and receiver thread will be "started". 542e77a0a5cSAndreas Gruenbacher * drbd_thread_start needs to set "RESTARTING" in that case. 543b411b363SPhilipp Reisner * t_state check and assignment needs to be within the same spinlock, 544e77a0a5cSAndreas Gruenbacher * so either thread_start sees EXITING, and can remap to RESTARTING, 545e77a0a5cSAndreas Gruenbacher * or thread_start see NONE, and can proceed as normal. 546b411b363SPhilipp Reisner */ 547b411b363SPhilipp Reisner 548e77a0a5cSAndreas Gruenbacher if (thi->t_state == RESTARTING) { 549392c8801SPhilipp Reisner conn_info(tconn, "Restarting %s thread\n", thi->name); 550e77a0a5cSAndreas Gruenbacher thi->t_state = RUNNING; 551b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 552b411b363SPhilipp Reisner goto restart; 553b411b363SPhilipp Reisner } 554b411b363SPhilipp Reisner 555b411b363SPhilipp Reisner thi->task = NULL; 556e77a0a5cSAndreas Gruenbacher thi->t_state = NONE; 557b411b363SPhilipp Reisner smp_mb(); 558992d6e91SLars Ellenberg complete_all(&thi->stop); 559b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 560b411b363SPhilipp Reisner 561392c8801SPhilipp Reisner conn_info(tconn, "Terminating %s\n", current->comm); 562b411b363SPhilipp Reisner 563b411b363SPhilipp Reisner /* Release mod reference taken when thread was started */ 5649dc9fbb3SPhilipp Reisner 5659dc9fbb3SPhilipp Reisner kref_put(&tconn->kref, &conn_destroy); 566b411b363SPhilipp Reisner module_put(THIS_MODULE); 567b411b363SPhilipp Reisner return retval; 568b411b363SPhilipp Reisner } 569b411b363SPhilipp Reisner 570392c8801SPhilipp Reisner static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi, 571bed879aeSPhilipp Reisner int (*func) (struct drbd_thread *), char *name) 572b411b363SPhilipp Reisner { 573b411b363SPhilipp Reisner spin_lock_init(&thi->t_lock); 574b411b363SPhilipp Reisner thi->task = NULL; 575e77a0a5cSAndreas Gruenbacher thi->t_state = NONE; 576b411b363SPhilipp Reisner thi->function = func; 577392c8801SPhilipp Reisner thi->tconn = tconn; 578bed879aeSPhilipp Reisner strncpy(thi->name, name, ARRAY_SIZE(thi->name)); 579b411b363SPhilipp Reisner } 580b411b363SPhilipp Reisner 581b411b363SPhilipp Reisner int drbd_thread_start(struct drbd_thread *thi) 582b411b363SPhilipp Reisner { 583392c8801SPhilipp Reisner struct drbd_tconn *tconn = thi->tconn; 584b411b363SPhilipp Reisner struct task_struct *nt; 585b411b363SPhilipp Reisner unsigned long flags; 586b411b363SPhilipp Reisner 587b411b363SPhilipp Reisner /* is used from state engine doing drbd_thread_stop_nowait, 588b411b363SPhilipp Reisner * while holding the req lock irqsave */ 589b411b363SPhilipp Reisner spin_lock_irqsave(&thi->t_lock, flags); 590b411b363SPhilipp Reisner 591b411b363SPhilipp Reisner switch (thi->t_state) { 592e77a0a5cSAndreas Gruenbacher case NONE: 593392c8801SPhilipp Reisner conn_info(tconn, "Starting %s thread (from %s [%d])\n", 594bed879aeSPhilipp Reisner thi->name, current->comm, current->pid); 595b411b363SPhilipp Reisner 596b411b363SPhilipp Reisner /* Get ref on module for thread - this is released when thread exits */ 597b411b363SPhilipp Reisner if (!try_module_get(THIS_MODULE)) { 598392c8801SPhilipp Reisner conn_err(tconn, "Failed to get module reference in drbd_thread_start\n"); 599b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 60081e84650SAndreas Gruenbacher return false; 601b411b363SPhilipp Reisner } 602b411b363SPhilipp Reisner 6039dc9fbb3SPhilipp Reisner kref_get(&thi->tconn->kref); 6049dc9fbb3SPhilipp Reisner 605b411b363SPhilipp Reisner init_completion(&thi->stop); 606b411b363SPhilipp Reisner thi->reset_cpu_mask = 1; 607e77a0a5cSAndreas Gruenbacher thi->t_state = RUNNING; 608b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 609b411b363SPhilipp Reisner flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ 610b411b363SPhilipp Reisner 611b411b363SPhilipp Reisner nt = kthread_create(drbd_thread_setup, (void *) thi, 612392c8801SPhilipp Reisner "drbd_%c_%s", thi->name[0], thi->tconn->name); 613b411b363SPhilipp Reisner 614b411b363SPhilipp Reisner if (IS_ERR(nt)) { 615392c8801SPhilipp Reisner conn_err(tconn, "Couldn't start thread\n"); 616b411b363SPhilipp Reisner 6179dc9fbb3SPhilipp Reisner kref_put(&tconn->kref, &conn_destroy); 618b411b363SPhilipp Reisner module_put(THIS_MODULE); 61981e84650SAndreas Gruenbacher return false; 620b411b363SPhilipp Reisner } 621b411b363SPhilipp Reisner spin_lock_irqsave(&thi->t_lock, flags); 622b411b363SPhilipp Reisner thi->task = nt; 623e77a0a5cSAndreas Gruenbacher thi->t_state = RUNNING; 624b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 625b411b363SPhilipp Reisner wake_up_process(nt); 626b411b363SPhilipp Reisner break; 627e77a0a5cSAndreas Gruenbacher case EXITING: 628e77a0a5cSAndreas Gruenbacher thi->t_state = RESTARTING; 629392c8801SPhilipp Reisner conn_info(tconn, "Restarting %s thread (from %s [%d])\n", 630bed879aeSPhilipp Reisner thi->name, current->comm, current->pid); 631b411b363SPhilipp Reisner /* fall through */ 632e77a0a5cSAndreas Gruenbacher case RUNNING: 633e77a0a5cSAndreas Gruenbacher case RESTARTING: 634b411b363SPhilipp Reisner default: 635b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 636b411b363SPhilipp Reisner break; 637b411b363SPhilipp Reisner } 638b411b363SPhilipp Reisner 63981e84650SAndreas Gruenbacher return true; 640b411b363SPhilipp Reisner } 641b411b363SPhilipp Reisner 642b411b363SPhilipp Reisner 643b411b363SPhilipp Reisner void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) 644b411b363SPhilipp Reisner { 645b411b363SPhilipp Reisner unsigned long flags; 646b411b363SPhilipp Reisner 647e77a0a5cSAndreas Gruenbacher enum drbd_thread_state ns = restart ? RESTARTING : EXITING; 648b411b363SPhilipp Reisner 649b411b363SPhilipp Reisner /* may be called from state engine, holding the req lock irqsave */ 650b411b363SPhilipp Reisner spin_lock_irqsave(&thi->t_lock, flags); 651b411b363SPhilipp Reisner 652e77a0a5cSAndreas Gruenbacher if (thi->t_state == NONE) { 653b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 654b411b363SPhilipp Reisner if (restart) 655b411b363SPhilipp Reisner drbd_thread_start(thi); 656b411b363SPhilipp Reisner return; 657b411b363SPhilipp Reisner } 658b411b363SPhilipp Reisner 659b411b363SPhilipp Reisner if (thi->t_state != ns) { 660b411b363SPhilipp Reisner if (thi->task == NULL) { 661b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 662b411b363SPhilipp Reisner return; 663b411b363SPhilipp Reisner } 664b411b363SPhilipp Reisner 665b411b363SPhilipp Reisner thi->t_state = ns; 666b411b363SPhilipp Reisner smp_mb(); 667b411b363SPhilipp Reisner init_completion(&thi->stop); 668b411b363SPhilipp Reisner if (thi->task != current) 669b411b363SPhilipp Reisner force_sig(DRBD_SIGKILL, thi->task); 670b411b363SPhilipp Reisner } 671b411b363SPhilipp Reisner 672b411b363SPhilipp Reisner spin_unlock_irqrestore(&thi->t_lock, flags); 673b411b363SPhilipp Reisner 674b411b363SPhilipp Reisner if (wait) 675b411b363SPhilipp Reisner wait_for_completion(&thi->stop); 676b411b363SPhilipp Reisner } 677b411b363SPhilipp Reisner 678392c8801SPhilipp Reisner static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task) 679bed879aeSPhilipp Reisner { 680bed879aeSPhilipp Reisner struct drbd_thread *thi = 681bed879aeSPhilipp Reisner task == tconn->receiver.task ? &tconn->receiver : 682bed879aeSPhilipp Reisner task == tconn->asender.task ? &tconn->asender : 683bed879aeSPhilipp Reisner task == tconn->worker.task ? &tconn->worker : NULL; 684bed879aeSPhilipp Reisner 685bed879aeSPhilipp Reisner return thi; 686bed879aeSPhilipp Reisner } 687bed879aeSPhilipp Reisner 688392c8801SPhilipp Reisner char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task) 689bed879aeSPhilipp Reisner { 690392c8801SPhilipp Reisner struct drbd_thread *thi = drbd_task_to_thread(tconn, task); 691bed879aeSPhilipp Reisner return thi ? thi->name : task->comm; 692bed879aeSPhilipp Reisner } 693bed879aeSPhilipp Reisner 69480883197SPhilipp Reisner int conn_lowest_minor(struct drbd_tconn *tconn) 69580822284SPhilipp Reisner { 696e90285e0SPhilipp Reisner struct drbd_conf *mdev; 697695d08faSPhilipp Reisner int vnr = 0, m; 698774b3055SPhilipp Reisner 699695d08faSPhilipp Reisner rcu_read_lock(); 700e90285e0SPhilipp Reisner mdev = idr_get_next(&tconn->volumes, &vnr); 701695d08faSPhilipp Reisner m = mdev ? mdev_to_minor(mdev) : -1; 702695d08faSPhilipp Reisner rcu_read_unlock(); 703695d08faSPhilipp Reisner 704695d08faSPhilipp Reisner return m; 70580822284SPhilipp Reisner } 706774b3055SPhilipp Reisner 707774b3055SPhilipp Reisner #ifdef CONFIG_SMP 708b411b363SPhilipp Reisner /** 709b411b363SPhilipp Reisner * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs 710b411b363SPhilipp Reisner * @mdev: DRBD device. 711b411b363SPhilipp Reisner * 712b411b363SPhilipp Reisner * Forces all threads of a device onto the same CPU. This is beneficial for 713b411b363SPhilipp Reisner * DRBD's performance. May be overwritten by user's configuration. 714b411b363SPhilipp Reisner */ 71580822284SPhilipp Reisner void drbd_calc_cpu_mask(struct drbd_tconn *tconn) 716b411b363SPhilipp Reisner { 717b411b363SPhilipp Reisner int ord, cpu; 718b411b363SPhilipp Reisner 719b411b363SPhilipp Reisner /* user override. */ 72080822284SPhilipp Reisner if (cpumask_weight(tconn->cpu_mask)) 721b411b363SPhilipp Reisner return; 722b411b363SPhilipp Reisner 72380822284SPhilipp Reisner ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask); 724b411b363SPhilipp Reisner for_each_online_cpu(cpu) { 725b411b363SPhilipp Reisner if (ord-- == 0) { 72680822284SPhilipp Reisner cpumask_set_cpu(cpu, tconn->cpu_mask); 727b411b363SPhilipp Reisner return; 728b411b363SPhilipp Reisner } 729b411b363SPhilipp Reisner } 730b411b363SPhilipp Reisner /* should not be reached */ 73180822284SPhilipp Reisner cpumask_setall(tconn->cpu_mask); 732b411b363SPhilipp Reisner } 733b411b363SPhilipp Reisner 734b411b363SPhilipp Reisner /** 735b411b363SPhilipp Reisner * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread 736b411b363SPhilipp Reisner * @mdev: DRBD device. 737bc31fe33SPhilipp Reisner * @thi: drbd_thread object 738b411b363SPhilipp Reisner * 739b411b363SPhilipp Reisner * call in the "main loop" of _all_ threads, no need for any mutex, current won't die 740b411b363SPhilipp Reisner * prematurely. 741b411b363SPhilipp Reisner */ 74280822284SPhilipp Reisner void drbd_thread_current_set_cpu(struct drbd_thread *thi) 743b411b363SPhilipp Reisner { 744b411b363SPhilipp Reisner struct task_struct *p = current; 745bed879aeSPhilipp Reisner 746b411b363SPhilipp Reisner if (!thi->reset_cpu_mask) 747b411b363SPhilipp Reisner return; 748b411b363SPhilipp Reisner thi->reset_cpu_mask = 0; 749392c8801SPhilipp Reisner set_cpus_allowed_ptr(p, thi->tconn->cpu_mask); 750b411b363SPhilipp Reisner } 751b411b363SPhilipp Reisner #endif 752b411b363SPhilipp Reisner 75352b061a4SAndreas Gruenbacher /** 75452b061a4SAndreas Gruenbacher * drbd_header_size - size of a packet header 75552b061a4SAndreas Gruenbacher * 75652b061a4SAndreas Gruenbacher * The header size is a multiple of 8, so any payload following the header is 75752b061a4SAndreas Gruenbacher * word aligned on 64-bit architectures. (The bitmap send and receive code 75852b061a4SAndreas Gruenbacher * relies on this.) 75952b061a4SAndreas Gruenbacher */ 76052b061a4SAndreas Gruenbacher unsigned int drbd_header_size(struct drbd_tconn *tconn) 76152b061a4SAndreas Gruenbacher { 7620c8e36d9SAndreas Gruenbacher if (tconn->agreed_pro_version >= 100) { 7630c8e36d9SAndreas Gruenbacher BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8)); 7640c8e36d9SAndreas Gruenbacher return sizeof(struct p_header100); 7650c8e36d9SAndreas Gruenbacher } else { 7660c8e36d9SAndreas Gruenbacher BUILD_BUG_ON(sizeof(struct p_header80) != 7670c8e36d9SAndreas Gruenbacher sizeof(struct p_header95)); 76852b061a4SAndreas Gruenbacher BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8)); 76952b061a4SAndreas Gruenbacher return sizeof(struct p_header80); 77052b061a4SAndreas Gruenbacher } 7710c8e36d9SAndreas Gruenbacher } 77252b061a4SAndreas Gruenbacher 773e658983aSAndreas Gruenbacher static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) 774fd340c12SPhilipp Reisner { 775fd340c12SPhilipp Reisner h->magic = cpu_to_be32(DRBD_MAGIC); 776fd340c12SPhilipp Reisner h->command = cpu_to_be16(cmd); 777fd340c12SPhilipp Reisner h->length = cpu_to_be16(size); 778e658983aSAndreas Gruenbacher return sizeof(struct p_header80); 779fd340c12SPhilipp Reisner } 780fd340c12SPhilipp Reisner 781e658983aSAndreas Gruenbacher static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) 782fd340c12SPhilipp Reisner { 783fd340c12SPhilipp Reisner h->magic = cpu_to_be16(DRBD_MAGIC_BIG); 784fd340c12SPhilipp Reisner h->command = cpu_to_be16(cmd); 785fd340c12SPhilipp Reisner h->length = cpu_to_be32(size); 786e658983aSAndreas Gruenbacher return sizeof(struct p_header95); 787fd340c12SPhilipp Reisner } 788fd340c12SPhilipp Reisner 7890c8e36d9SAndreas Gruenbacher static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd, 7900c8e36d9SAndreas Gruenbacher int size, int vnr) 791d38e787eSPhilipp Reisner { 7920c8e36d9SAndreas Gruenbacher h->magic = cpu_to_be32(DRBD_MAGIC_100); 7930c8e36d9SAndreas Gruenbacher h->volume = cpu_to_be16(vnr); 7940c8e36d9SAndreas Gruenbacher h->command = cpu_to_be16(cmd); 7950c8e36d9SAndreas Gruenbacher h->length = cpu_to_be32(size); 7960c8e36d9SAndreas Gruenbacher h->pad = 0; 7970c8e36d9SAndreas Gruenbacher return sizeof(struct p_header100); 7980c8e36d9SAndreas Gruenbacher } 7990c8e36d9SAndreas Gruenbacher 8000c8e36d9SAndreas Gruenbacher static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, 8010c8e36d9SAndreas Gruenbacher void *buffer, enum drbd_packet cmd, int size) 8020c8e36d9SAndreas Gruenbacher { 8030c8e36d9SAndreas Gruenbacher if (tconn->agreed_pro_version >= 100) 8040c8e36d9SAndreas Gruenbacher return prepare_header100(buffer, cmd, size, vnr); 8050c8e36d9SAndreas Gruenbacher else if (tconn->agreed_pro_version >= 95 && 8060c8e36d9SAndreas Gruenbacher size > DRBD_MAX_SIZE_H80_PACKET) 807e658983aSAndreas Gruenbacher return prepare_header95(buffer, cmd, size); 808d38e787eSPhilipp Reisner else 809e658983aSAndreas Gruenbacher return prepare_header80(buffer, cmd, size); 810d38e787eSPhilipp Reisner } 811d38e787eSPhilipp Reisner 812a7eb7bdfSAndreas Gruenbacher static void *__conn_prepare_command(struct drbd_tconn *tconn, 813a7eb7bdfSAndreas Gruenbacher struct drbd_socket *sock) 814a7eb7bdfSAndreas Gruenbacher { 815a7eb7bdfSAndreas Gruenbacher if (!sock->socket) 816a7eb7bdfSAndreas Gruenbacher return NULL; 817a7eb7bdfSAndreas Gruenbacher return sock->sbuf + drbd_header_size(tconn); 818a7eb7bdfSAndreas Gruenbacher } 819a7eb7bdfSAndreas Gruenbacher 820dba58587SAndreas Gruenbacher void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock) 821dba58587SAndreas Gruenbacher { 822a7eb7bdfSAndreas Gruenbacher void *p; 823a7eb7bdfSAndreas Gruenbacher 824dba58587SAndreas Gruenbacher mutex_lock(&sock->mutex); 825a7eb7bdfSAndreas Gruenbacher p = __conn_prepare_command(tconn, sock); 826a7eb7bdfSAndreas Gruenbacher if (!p) 827dba58587SAndreas Gruenbacher mutex_unlock(&sock->mutex); 828a7eb7bdfSAndreas Gruenbacher 829a7eb7bdfSAndreas Gruenbacher return p; 830dba58587SAndreas Gruenbacher } 831dba58587SAndreas Gruenbacher 832dba58587SAndreas Gruenbacher void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock) 833dba58587SAndreas Gruenbacher { 834dba58587SAndreas Gruenbacher return conn_prepare_command(mdev->tconn, sock); 835dba58587SAndreas Gruenbacher } 836dba58587SAndreas Gruenbacher 837dba58587SAndreas Gruenbacher static int __send_command(struct drbd_tconn *tconn, int vnr, 838dba58587SAndreas Gruenbacher struct drbd_socket *sock, enum drbd_packet cmd, 839dba58587SAndreas Gruenbacher unsigned int header_size, void *data, 840dba58587SAndreas Gruenbacher unsigned int size) 841dba58587SAndreas Gruenbacher { 842dba58587SAndreas Gruenbacher int msg_flags; 843dba58587SAndreas Gruenbacher int err; 844dba58587SAndreas Gruenbacher 845dba58587SAndreas Gruenbacher /* 846dba58587SAndreas Gruenbacher * Called with @data == NULL and the size of the data blocks in @size 847dba58587SAndreas Gruenbacher * for commands that send data blocks. For those commands, omit the 848dba58587SAndreas Gruenbacher * MSG_MORE flag: this will increase the likelihood that data blocks 849dba58587SAndreas Gruenbacher * which are page aligned on the sender will end up page aligned on the 850dba58587SAndreas Gruenbacher * receiver. 851dba58587SAndreas Gruenbacher */ 852dba58587SAndreas Gruenbacher msg_flags = data ? MSG_MORE : 0; 853dba58587SAndreas Gruenbacher 854e658983aSAndreas Gruenbacher header_size += prepare_header(tconn, vnr, sock->sbuf, cmd, 855e658983aSAndreas Gruenbacher header_size + size); 856dba58587SAndreas Gruenbacher err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size, 857dba58587SAndreas Gruenbacher msg_flags); 858dba58587SAndreas Gruenbacher if (data && !err) 859dba58587SAndreas Gruenbacher err = drbd_send_all(tconn, sock->socket, data, size, 0); 860dba58587SAndreas Gruenbacher return err; 861dba58587SAndreas Gruenbacher } 862dba58587SAndreas Gruenbacher 863a7eb7bdfSAndreas Gruenbacher static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, 864a7eb7bdfSAndreas Gruenbacher enum drbd_packet cmd, unsigned int header_size, 865a7eb7bdfSAndreas Gruenbacher void *data, unsigned int size) 866a7eb7bdfSAndreas Gruenbacher { 867a7eb7bdfSAndreas Gruenbacher return __send_command(tconn, 0, sock, cmd, header_size, data, size); 868a7eb7bdfSAndreas Gruenbacher } 869a7eb7bdfSAndreas Gruenbacher 870dba58587SAndreas Gruenbacher int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock, 871dba58587SAndreas Gruenbacher enum drbd_packet cmd, unsigned int header_size, 872dba58587SAndreas Gruenbacher void *data, unsigned int size) 873dba58587SAndreas Gruenbacher { 874dba58587SAndreas Gruenbacher int err; 875dba58587SAndreas Gruenbacher 876a7eb7bdfSAndreas Gruenbacher err = __conn_send_command(tconn, sock, cmd, header_size, data, size); 877dba58587SAndreas Gruenbacher mutex_unlock(&sock->mutex); 878dba58587SAndreas Gruenbacher return err; 879dba58587SAndreas Gruenbacher } 880dba58587SAndreas Gruenbacher 881dba58587SAndreas Gruenbacher int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock, 882dba58587SAndreas Gruenbacher enum drbd_packet cmd, unsigned int header_size, 883dba58587SAndreas Gruenbacher void *data, unsigned int size) 884dba58587SAndreas Gruenbacher { 885dba58587SAndreas Gruenbacher int err; 886dba58587SAndreas Gruenbacher 887dba58587SAndreas Gruenbacher err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size, 888dba58587SAndreas Gruenbacher data, size); 889dba58587SAndreas Gruenbacher mutex_unlock(&sock->mutex); 890dba58587SAndreas Gruenbacher return err; 891dba58587SAndreas Gruenbacher } 892dba58587SAndreas Gruenbacher 893e307f352SAndreas Gruenbacher int drbd_send_ping(struct drbd_tconn *tconn) 894e307f352SAndreas Gruenbacher { 8959f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 8969f5bdc33SAndreas Gruenbacher 8979f5bdc33SAndreas Gruenbacher sock = &tconn->meta; 8989f5bdc33SAndreas Gruenbacher if (!conn_prepare_command(tconn, sock)) 8999f5bdc33SAndreas Gruenbacher return -EIO; 900e658983aSAndreas Gruenbacher return conn_send_command(tconn, sock, P_PING, 0, NULL, 0); 901e307f352SAndreas Gruenbacher } 902e307f352SAndreas Gruenbacher 903e307f352SAndreas Gruenbacher int drbd_send_ping_ack(struct drbd_tconn *tconn) 904e307f352SAndreas Gruenbacher { 9059f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 9069f5bdc33SAndreas Gruenbacher 9079f5bdc33SAndreas Gruenbacher sock = &tconn->meta; 9089f5bdc33SAndreas Gruenbacher if (!conn_prepare_command(tconn, sock)) 9099f5bdc33SAndreas Gruenbacher return -EIO; 910e658983aSAndreas Gruenbacher return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0); 911e307f352SAndreas Gruenbacher } 912e307f352SAndreas Gruenbacher 913f399002eSLars Ellenberg int drbd_send_sync_param(struct drbd_conf *mdev) 914b411b363SPhilipp Reisner { 9157c96715aSAndreas Gruenbacher struct drbd_socket *sock; 9169f5bdc33SAndreas Gruenbacher struct p_rs_param_95 *p; 9179f5bdc33SAndreas Gruenbacher int size; 91831890f4aSPhilipp Reisner const int apv = mdev->tconn->agreed_pro_version; 9199f5bdc33SAndreas Gruenbacher enum drbd_packet cmd; 92044ed167dSPhilipp Reisner struct net_conf *nc; 921daeda1ccSPhilipp Reisner struct disk_conf *dc; 9229f5bdc33SAndreas Gruenbacher 9239f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 9249f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 9259f5bdc33SAndreas Gruenbacher if (!p) 9269f5bdc33SAndreas Gruenbacher return -EIO; 927b411b363SPhilipp Reisner 92844ed167dSPhilipp Reisner rcu_read_lock(); 92944ed167dSPhilipp Reisner nc = rcu_dereference(mdev->tconn->net_conf); 93044ed167dSPhilipp Reisner 931b411b363SPhilipp Reisner size = apv <= 87 ? sizeof(struct p_rs_param) 932b411b363SPhilipp Reisner : apv == 88 ? sizeof(struct p_rs_param) 93344ed167dSPhilipp Reisner + strlen(nc->verify_alg) + 1 9348e26f9ccSPhilipp Reisner : apv <= 94 ? sizeof(struct p_rs_param_89) 9358e26f9ccSPhilipp Reisner : /* apv >= 95 */ sizeof(struct p_rs_param_95); 936b411b363SPhilipp Reisner 9379f5bdc33SAndreas Gruenbacher cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; 938b411b363SPhilipp Reisner 939b411b363SPhilipp Reisner /* initialize verify_alg and csums_alg */ 940b411b363SPhilipp Reisner memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 941b411b363SPhilipp Reisner 942f399002eSLars Ellenberg if (get_ldev(mdev)) { 943daeda1ccSPhilipp Reisner dc = rcu_dereference(mdev->ldev->disk_conf); 9446394b935SAndreas Gruenbacher p->resync_rate = cpu_to_be32(dc->resync_rate); 945daeda1ccSPhilipp Reisner p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead); 946daeda1ccSPhilipp Reisner p->c_delay_target = cpu_to_be32(dc->c_delay_target); 947daeda1ccSPhilipp Reisner p->c_fill_target = cpu_to_be32(dc->c_fill_target); 948daeda1ccSPhilipp Reisner p->c_max_rate = cpu_to_be32(dc->c_max_rate); 949f399002eSLars Ellenberg put_ldev(mdev); 950f399002eSLars Ellenberg } else { 9516394b935SAndreas Gruenbacher p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF); 952f399002eSLars Ellenberg p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF); 953f399002eSLars Ellenberg p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF); 954f399002eSLars Ellenberg p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF); 955f399002eSLars Ellenberg p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF); 956f399002eSLars Ellenberg } 957b411b363SPhilipp Reisner 958b411b363SPhilipp Reisner if (apv >= 88) 95944ed167dSPhilipp Reisner strcpy(p->verify_alg, nc->verify_alg); 960b411b363SPhilipp Reisner if (apv >= 89) 96144ed167dSPhilipp Reisner strcpy(p->csums_alg, nc->csums_alg); 96244ed167dSPhilipp Reisner rcu_read_unlock(); 963b411b363SPhilipp Reisner 9649f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, cmd, size, NULL, 0); 965b411b363SPhilipp Reisner } 966b411b363SPhilipp Reisner 967d659f2aaSPhilipp Reisner int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd) 968b411b363SPhilipp Reisner { 9699f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 970b411b363SPhilipp Reisner struct p_protocol *p; 97144ed167dSPhilipp Reisner struct net_conf *nc; 9729f5bdc33SAndreas Gruenbacher int size, cf; 973b411b363SPhilipp Reisner 9749f5bdc33SAndreas Gruenbacher sock = &tconn->data; 975a7eb7bdfSAndreas Gruenbacher p = __conn_prepare_command(tconn, sock); 9769f5bdc33SAndreas Gruenbacher if (!p) 9779f5bdc33SAndreas Gruenbacher return -EIO; 9789f5bdc33SAndreas Gruenbacher 97944ed167dSPhilipp Reisner rcu_read_lock(); 98044ed167dSPhilipp Reisner nc = rcu_dereference(tconn->net_conf); 98144ed167dSPhilipp Reisner 9826dff2902SAndreas Gruenbacher if (nc->tentative && tconn->agreed_pro_version < 92) { 98344ed167dSPhilipp Reisner rcu_read_unlock(); 98444ed167dSPhilipp Reisner mutex_unlock(&sock->mutex); 98544ed167dSPhilipp Reisner conn_err(tconn, "--dry-run is not supported by peer"); 98644ed167dSPhilipp Reisner return -EOPNOTSUPP; 98744ed167dSPhilipp Reisner } 98844ed167dSPhilipp Reisner 9899f5bdc33SAndreas Gruenbacher size = sizeof(*p); 990dc8228d1SPhilipp Reisner if (tconn->agreed_pro_version >= 87) 99144ed167dSPhilipp Reisner size += strlen(nc->integrity_alg) + 1; 992b411b363SPhilipp Reisner 99344ed167dSPhilipp Reisner p->protocol = cpu_to_be32(nc->wire_protocol); 99444ed167dSPhilipp Reisner p->after_sb_0p = cpu_to_be32(nc->after_sb_0p); 99544ed167dSPhilipp Reisner p->after_sb_1p = cpu_to_be32(nc->after_sb_1p); 99644ed167dSPhilipp Reisner p->after_sb_2p = cpu_to_be32(nc->after_sb_2p); 99744ed167dSPhilipp Reisner p->two_primaries = cpu_to_be32(nc->two_primaries); 998cf14c2e9SPhilipp Reisner cf = 0; 9996139f60dSAndreas Gruenbacher if (nc->discard_my_data) 10006139f60dSAndreas Gruenbacher cf |= CF_DISCARD_MY_DATA; 10016dff2902SAndreas Gruenbacher if (nc->tentative) 1002cf14c2e9SPhilipp Reisner cf |= CF_DRY_RUN; 1003cf14c2e9SPhilipp Reisner p->conn_flags = cpu_to_be32(cf); 1004cf14c2e9SPhilipp Reisner 1005dc8228d1SPhilipp Reisner if (tconn->agreed_pro_version >= 87) 100644ed167dSPhilipp Reisner strcpy(p->integrity_alg, nc->integrity_alg); 100744ed167dSPhilipp Reisner rcu_read_unlock(); 100844ed167dSPhilipp Reisner 1009d659f2aaSPhilipp Reisner return __conn_send_command(tconn, sock, cmd, size, NULL, 0); 1010a7eb7bdfSAndreas Gruenbacher } 1011a7eb7bdfSAndreas Gruenbacher 1012a7eb7bdfSAndreas Gruenbacher int drbd_send_protocol(struct drbd_tconn *tconn) 1013a7eb7bdfSAndreas Gruenbacher { 1014a7eb7bdfSAndreas Gruenbacher int err; 1015a7eb7bdfSAndreas Gruenbacher 1016a7eb7bdfSAndreas Gruenbacher mutex_lock(&tconn->data.mutex); 1017d659f2aaSPhilipp Reisner err = __drbd_send_protocol(tconn, P_PROTOCOL); 1018a7eb7bdfSAndreas Gruenbacher mutex_unlock(&tconn->data.mutex); 1019a7eb7bdfSAndreas Gruenbacher 1020a7eb7bdfSAndreas Gruenbacher return err; 1021b411b363SPhilipp Reisner } 1022b411b363SPhilipp Reisner 1023b411b363SPhilipp Reisner int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) 1024b411b363SPhilipp Reisner { 10259f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 10269f5bdc33SAndreas Gruenbacher struct p_uuids *p; 1027b411b363SPhilipp Reisner int i; 1028b411b363SPhilipp Reisner 1029b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_NEGOTIATING)) 10302ae5f95bSAndreas Gruenbacher return 0; 1031b411b363SPhilipp Reisner 10329f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 10339f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 10349f5bdc33SAndreas Gruenbacher if (!p) { 10359f5bdc33SAndreas Gruenbacher put_ldev(mdev); 10369f5bdc33SAndreas Gruenbacher return -EIO; 10379f5bdc33SAndreas Gruenbacher } 1038b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_SIZE; i++) 10399f5bdc33SAndreas Gruenbacher p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; 1040b411b363SPhilipp Reisner 1041b411b363SPhilipp Reisner mdev->comm_bm_set = drbd_bm_total_weight(mdev); 10429f5bdc33SAndreas Gruenbacher p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); 104344ed167dSPhilipp Reisner rcu_read_lock(); 10446139f60dSAndreas Gruenbacher uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0; 104544ed167dSPhilipp Reisner rcu_read_unlock(); 1046b411b363SPhilipp Reisner uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; 1047b411b363SPhilipp Reisner uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; 10489f5bdc33SAndreas Gruenbacher p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); 1049b411b363SPhilipp Reisner 1050b411b363SPhilipp Reisner put_ldev(mdev); 10519f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0); 1052b411b363SPhilipp Reisner } 1053b411b363SPhilipp Reisner 1054b411b363SPhilipp Reisner int drbd_send_uuids(struct drbd_conf *mdev) 1055b411b363SPhilipp Reisner { 1056b411b363SPhilipp Reisner return _drbd_send_uuids(mdev, 0); 1057b411b363SPhilipp Reisner } 1058b411b363SPhilipp Reisner 1059b411b363SPhilipp Reisner int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) 1060b411b363SPhilipp Reisner { 1061b411b363SPhilipp Reisner return _drbd_send_uuids(mdev, 8); 1062b411b363SPhilipp Reisner } 1063b411b363SPhilipp Reisner 106462b0da3aSLars Ellenberg void drbd_print_uuids(struct drbd_conf *mdev, const char *text) 106562b0da3aSLars Ellenberg { 106662b0da3aSLars Ellenberg if (get_ldev_if_state(mdev, D_NEGOTIATING)) { 106762b0da3aSLars Ellenberg u64 *uuid = mdev->ldev->md.uuid; 106862b0da3aSLars Ellenberg dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n", 106962b0da3aSLars Ellenberg text, 107062b0da3aSLars Ellenberg (unsigned long long)uuid[UI_CURRENT], 107162b0da3aSLars Ellenberg (unsigned long long)uuid[UI_BITMAP], 107262b0da3aSLars Ellenberg (unsigned long long)uuid[UI_HISTORY_START], 107362b0da3aSLars Ellenberg (unsigned long long)uuid[UI_HISTORY_END]); 107462b0da3aSLars Ellenberg put_ldev(mdev); 107562b0da3aSLars Ellenberg } else { 107662b0da3aSLars Ellenberg dev_info(DEV, "%s effective data uuid: %016llX\n", 107762b0da3aSLars Ellenberg text, 107862b0da3aSLars Ellenberg (unsigned long long)mdev->ed_uuid); 107962b0da3aSLars Ellenberg } 108062b0da3aSLars Ellenberg } 108162b0da3aSLars Ellenberg 10829c1b7f72SAndreas Gruenbacher void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) 1083b411b363SPhilipp Reisner { 10849f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 10859f5bdc33SAndreas Gruenbacher struct p_rs_uuid *p; 10865a22db89SLars Ellenberg u64 uuid; 1087b411b363SPhilipp Reisner 10885a22db89SLars Ellenberg D_ASSERT(mdev->state.disk == D_UP_TO_DATE); 10895a22db89SLars Ellenberg 10900cfac5ddSPhilipp Reisner uuid = mdev->ldev->md.uuid[UI_BITMAP]; 10910cfac5ddSPhilipp Reisner if (uuid && uuid != UUID_JUST_CREATED) 10920cfac5ddSPhilipp Reisner uuid = uuid + UUID_NEW_BM_OFFSET; 10930cfac5ddSPhilipp Reisner else 10940cfac5ddSPhilipp Reisner get_random_bytes(&uuid, sizeof(u64)); 10955a22db89SLars Ellenberg drbd_uuid_set(mdev, UI_BITMAP, uuid); 109662b0da3aSLars Ellenberg drbd_print_uuids(mdev, "updated sync UUID"); 10975a22db89SLars Ellenberg drbd_md_sync(mdev); 1098b411b363SPhilipp Reisner 10999f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 11009f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 11019f5bdc33SAndreas Gruenbacher if (p) { 11029f5bdc33SAndreas Gruenbacher p->uuid = cpu_to_be64(uuid); 11039f5bdc33SAndreas Gruenbacher drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0); 11049f5bdc33SAndreas Gruenbacher } 1105b411b363SPhilipp Reisner } 1106b411b363SPhilipp Reisner 1107e89b591cSPhilipp Reisner int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) 1108b411b363SPhilipp Reisner { 11099f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 11109f5bdc33SAndreas Gruenbacher struct p_sizes *p; 1111b411b363SPhilipp Reisner sector_t d_size, u_size; 111299432fccSPhilipp Reisner int q_order_type, max_bio_size; 1113b411b363SPhilipp Reisner 1114b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_NEGOTIATING)) { 1115b411b363SPhilipp Reisner D_ASSERT(mdev->ldev->backing_bdev); 1116b411b363SPhilipp Reisner d_size = drbd_get_max_capacity(mdev->ldev); 1117daeda1ccSPhilipp Reisner rcu_read_lock(); 1118daeda1ccSPhilipp Reisner u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size; 1119daeda1ccSPhilipp Reisner rcu_read_unlock(); 1120b411b363SPhilipp Reisner q_order_type = drbd_queue_order_type(mdev); 112199432fccSPhilipp Reisner max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; 112299432fccSPhilipp Reisner max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); 1123b411b363SPhilipp Reisner put_ldev(mdev); 1124b411b363SPhilipp Reisner } else { 1125b411b363SPhilipp Reisner d_size = 0; 1126b411b363SPhilipp Reisner u_size = 0; 1127b411b363SPhilipp Reisner q_order_type = QUEUE_ORDERED_NONE; 112899432fccSPhilipp Reisner max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ 1129b411b363SPhilipp Reisner } 1130b411b363SPhilipp Reisner 11319f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 11329f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 11339f5bdc33SAndreas Gruenbacher if (!p) 11349f5bdc33SAndreas Gruenbacher return -EIO; 11352ffca4f3SPhilipp Reisner 11362ffca4f3SPhilipp Reisner if (mdev->tconn->agreed_pro_version <= 94) 11372ffca4f3SPhilipp Reisner max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 11382ffca4f3SPhilipp Reisner else if (mdev->tconn->agreed_pro_version < 100) 11392ffca4f3SPhilipp Reisner max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95); 11402ffca4f3SPhilipp Reisner 11419f5bdc33SAndreas Gruenbacher p->d_size = cpu_to_be64(d_size); 11429f5bdc33SAndreas Gruenbacher p->u_size = cpu_to_be64(u_size); 11439f5bdc33SAndreas Gruenbacher p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); 11449f5bdc33SAndreas Gruenbacher p->max_bio_size = cpu_to_be32(max_bio_size); 11459f5bdc33SAndreas Gruenbacher p->queue_order_type = cpu_to_be16(q_order_type); 11469f5bdc33SAndreas Gruenbacher p->dds_flags = cpu_to_be16(flags); 11479f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0); 1148b411b363SPhilipp Reisner } 1149b411b363SPhilipp Reisner 1150b411b363SPhilipp Reisner /** 1151*43de7c85SPhilipp Reisner * drbd_send_current_state() - Sends the drbd state to the peer 1152b411b363SPhilipp Reisner * @mdev: DRBD device. 1153b411b363SPhilipp Reisner */ 1154*43de7c85SPhilipp Reisner int drbd_send_current_state(struct drbd_conf *mdev) 1155b411b363SPhilipp Reisner { 11567c96715aSAndreas Gruenbacher struct drbd_socket *sock; 11579f5bdc33SAndreas Gruenbacher struct p_state *p; 1158b411b363SPhilipp Reisner 11597c96715aSAndreas Gruenbacher sock = &mdev->tconn->data; 11609f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 11619f5bdc33SAndreas Gruenbacher if (!p) 11629f5bdc33SAndreas Gruenbacher return -EIO; 11639f5bdc33SAndreas Gruenbacher p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ 11649f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); 1165b411b363SPhilipp Reisner } 1166b411b363SPhilipp Reisner 1167*43de7c85SPhilipp Reisner /** 1168*43de7c85SPhilipp Reisner * drbd_send_state() - After a state change, sends the new state to the peer 1169*43de7c85SPhilipp Reisner * @mdev: DRBD device. 1170*43de7c85SPhilipp Reisner * @state: the state to send, not necessarily the current state. 1171*43de7c85SPhilipp Reisner * 1172*43de7c85SPhilipp Reisner * Each state change queues an "after_state_ch" work, which will eventually 1173*43de7c85SPhilipp Reisner * send the resulting new state to the peer. If more state changes happen 1174*43de7c85SPhilipp Reisner * between queuing and processing of the after_state_ch work, we still 1175*43de7c85SPhilipp Reisner * want to send each intermediary state in the order it occurred. 1176*43de7c85SPhilipp Reisner */ 1177*43de7c85SPhilipp Reisner int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) 1178*43de7c85SPhilipp Reisner { 1179*43de7c85SPhilipp Reisner struct drbd_socket *sock; 1180*43de7c85SPhilipp Reisner struct p_state *p; 1181*43de7c85SPhilipp Reisner 1182*43de7c85SPhilipp Reisner sock = &mdev->tconn->data; 1183*43de7c85SPhilipp Reisner p = drbd_prepare_command(mdev, sock); 1184*43de7c85SPhilipp Reisner if (!p) 1185*43de7c85SPhilipp Reisner return -EIO; 1186*43de7c85SPhilipp Reisner p->state = cpu_to_be32(state.i); /* Within the send mutex */ 1187*43de7c85SPhilipp Reisner return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0); 1188*43de7c85SPhilipp Reisner } 1189*43de7c85SPhilipp Reisner 11909f5bdc33SAndreas Gruenbacher int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) 1191b411b363SPhilipp Reisner { 11929f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 11939f5bdc33SAndreas Gruenbacher struct p_req_state *p; 1194b411b363SPhilipp Reisner 11959f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 11969f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 11979f5bdc33SAndreas Gruenbacher if (!p) 11989f5bdc33SAndreas Gruenbacher return -EIO; 11999f5bdc33SAndreas Gruenbacher p->mask = cpu_to_be32(mask.i); 12009f5bdc33SAndreas Gruenbacher p->val = cpu_to_be32(val.i); 12019f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0); 12029f5bdc33SAndreas Gruenbacher } 12039f5bdc33SAndreas Gruenbacher 12049f5bdc33SAndreas Gruenbacher int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) 12059f5bdc33SAndreas Gruenbacher { 12069f5bdc33SAndreas Gruenbacher enum drbd_packet cmd; 12079f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 12089f5bdc33SAndreas Gruenbacher struct p_req_state *p; 12099f5bdc33SAndreas Gruenbacher 12109f5bdc33SAndreas Gruenbacher cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; 12119f5bdc33SAndreas Gruenbacher sock = &tconn->data; 12129f5bdc33SAndreas Gruenbacher p = conn_prepare_command(tconn, sock); 12139f5bdc33SAndreas Gruenbacher if (!p) 12149f5bdc33SAndreas Gruenbacher return -EIO; 12159f5bdc33SAndreas Gruenbacher p->mask = cpu_to_be32(mask.i); 12169f5bdc33SAndreas Gruenbacher p->val = cpu_to_be32(val.i); 12179f5bdc33SAndreas Gruenbacher return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); 1218b411b363SPhilipp Reisner } 1219b411b363SPhilipp Reisner 12202f4e7abeSAndreas Gruenbacher void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) 1221b411b363SPhilipp Reisner { 12229f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 12239f5bdc33SAndreas Gruenbacher struct p_req_state_reply *p; 1224b411b363SPhilipp Reisner 12259f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->meta; 12269f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 12279f5bdc33SAndreas Gruenbacher if (p) { 12289f5bdc33SAndreas Gruenbacher p->retcode = cpu_to_be32(retcode); 12299f5bdc33SAndreas Gruenbacher drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0); 12309f5bdc33SAndreas Gruenbacher } 1231b411b363SPhilipp Reisner } 1232b411b363SPhilipp Reisner 12339f5bdc33SAndreas Gruenbacher void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) 1234047cd4a6SPhilipp Reisner { 12359f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 12369f5bdc33SAndreas Gruenbacher struct p_req_state_reply *p; 1237047cd4a6SPhilipp Reisner enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; 1238047cd4a6SPhilipp Reisner 12399f5bdc33SAndreas Gruenbacher sock = &tconn->meta; 12409f5bdc33SAndreas Gruenbacher p = conn_prepare_command(tconn, sock); 12419f5bdc33SAndreas Gruenbacher if (p) { 12429f5bdc33SAndreas Gruenbacher p->retcode = cpu_to_be32(retcode); 12439f5bdc33SAndreas Gruenbacher conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0); 12449f5bdc33SAndreas Gruenbacher } 1245047cd4a6SPhilipp Reisner } 1246047cd4a6SPhilipp Reisner 1247a02d1240SAndreas Gruenbacher static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) 1248a02d1240SAndreas Gruenbacher { 1249a02d1240SAndreas Gruenbacher BUG_ON(code & ~0xf); 1250a02d1240SAndreas Gruenbacher p->encoding = (p->encoding & ~0xf) | code; 1251a02d1240SAndreas Gruenbacher } 1252a02d1240SAndreas Gruenbacher 1253a02d1240SAndreas Gruenbacher static void dcbp_set_start(struct p_compressed_bm *p, int set) 1254a02d1240SAndreas Gruenbacher { 1255a02d1240SAndreas Gruenbacher p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); 1256a02d1240SAndreas Gruenbacher } 1257a02d1240SAndreas Gruenbacher 1258a02d1240SAndreas Gruenbacher static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n) 1259a02d1240SAndreas Gruenbacher { 1260a02d1240SAndreas Gruenbacher BUG_ON(n & ~0x7); 1261a02d1240SAndreas Gruenbacher p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); 1262a02d1240SAndreas Gruenbacher } 1263a02d1240SAndreas Gruenbacher 1264b411b363SPhilipp Reisner int fill_bitmap_rle_bits(struct drbd_conf *mdev, 1265b411b363SPhilipp Reisner struct p_compressed_bm *p, 126650d0b1adSAndreas Gruenbacher unsigned int size, 1267b411b363SPhilipp Reisner struct bm_xfer_ctx *c) 1268b411b363SPhilipp Reisner { 1269b411b363SPhilipp Reisner struct bitstream bs; 1270b411b363SPhilipp Reisner unsigned long plain_bits; 1271b411b363SPhilipp Reisner unsigned long tmp; 1272b411b363SPhilipp Reisner unsigned long rl; 1273b411b363SPhilipp Reisner unsigned len; 1274b411b363SPhilipp Reisner unsigned toggle; 127544ed167dSPhilipp Reisner int bits, use_rle; 1276b411b363SPhilipp Reisner 1277b411b363SPhilipp Reisner /* may we use this feature? */ 127844ed167dSPhilipp Reisner rcu_read_lock(); 127944ed167dSPhilipp Reisner use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle; 128044ed167dSPhilipp Reisner rcu_read_unlock(); 128144ed167dSPhilipp Reisner if (!use_rle || mdev->tconn->agreed_pro_version < 90) 1282b411b363SPhilipp Reisner return 0; 1283b411b363SPhilipp Reisner 1284b411b363SPhilipp Reisner if (c->bit_offset >= c->bm_bits) 1285b411b363SPhilipp Reisner return 0; /* nothing to do. */ 1286b411b363SPhilipp Reisner 1287b411b363SPhilipp Reisner /* use at most thus many bytes */ 128850d0b1adSAndreas Gruenbacher bitstream_init(&bs, p->code, size, 0); 128950d0b1adSAndreas Gruenbacher memset(p->code, 0, size); 1290b411b363SPhilipp Reisner /* plain bits covered in this code string */ 1291b411b363SPhilipp Reisner plain_bits = 0; 1292b411b363SPhilipp Reisner 1293b411b363SPhilipp Reisner /* p->encoding & 0x80 stores whether the first run length is set. 1294b411b363SPhilipp Reisner * bit offset is implicit. 1295b411b363SPhilipp Reisner * start with toggle == 2 to be able to tell the first iteration */ 1296b411b363SPhilipp Reisner toggle = 2; 1297b411b363SPhilipp Reisner 1298b411b363SPhilipp Reisner /* see how much plain bits we can stuff into one packet 1299b411b363SPhilipp Reisner * using RLE and VLI. */ 1300b411b363SPhilipp Reisner do { 1301b411b363SPhilipp Reisner tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) 1302b411b363SPhilipp Reisner : _drbd_bm_find_next(mdev, c->bit_offset); 1303b411b363SPhilipp Reisner if (tmp == -1UL) 1304b411b363SPhilipp Reisner tmp = c->bm_bits; 1305b411b363SPhilipp Reisner rl = tmp - c->bit_offset; 1306b411b363SPhilipp Reisner 1307b411b363SPhilipp Reisner if (toggle == 2) { /* first iteration */ 1308b411b363SPhilipp Reisner if (rl == 0) { 1309b411b363SPhilipp Reisner /* the first checked bit was set, 1310b411b363SPhilipp Reisner * store start value, */ 1311a02d1240SAndreas Gruenbacher dcbp_set_start(p, 1); 1312b411b363SPhilipp Reisner /* but skip encoding of zero run length */ 1313b411b363SPhilipp Reisner toggle = !toggle; 1314b411b363SPhilipp Reisner continue; 1315b411b363SPhilipp Reisner } 1316a02d1240SAndreas Gruenbacher dcbp_set_start(p, 0); 1317b411b363SPhilipp Reisner } 1318b411b363SPhilipp Reisner 1319b411b363SPhilipp Reisner /* paranoia: catch zero runlength. 1320b411b363SPhilipp Reisner * can only happen if bitmap is modified while we scan it. */ 1321b411b363SPhilipp Reisner if (rl == 0) { 1322b411b363SPhilipp Reisner dev_err(DEV, "unexpected zero runlength while encoding bitmap " 1323b411b363SPhilipp Reisner "t:%u bo:%lu\n", toggle, c->bit_offset); 1324b411b363SPhilipp Reisner return -1; 1325b411b363SPhilipp Reisner } 1326b411b363SPhilipp Reisner 1327b411b363SPhilipp Reisner bits = vli_encode_bits(&bs, rl); 1328b411b363SPhilipp Reisner if (bits == -ENOBUFS) /* buffer full */ 1329b411b363SPhilipp Reisner break; 1330b411b363SPhilipp Reisner if (bits <= 0) { 1331b411b363SPhilipp Reisner dev_err(DEV, "error while encoding bitmap: %d\n", bits); 1332b411b363SPhilipp Reisner return 0; 1333b411b363SPhilipp Reisner } 1334b411b363SPhilipp Reisner 1335b411b363SPhilipp Reisner toggle = !toggle; 1336b411b363SPhilipp Reisner plain_bits += rl; 1337b411b363SPhilipp Reisner c->bit_offset = tmp; 1338b411b363SPhilipp Reisner } while (c->bit_offset < c->bm_bits); 1339b411b363SPhilipp Reisner 1340b411b363SPhilipp Reisner len = bs.cur.b - p->code + !!bs.cur.bit; 1341b411b363SPhilipp Reisner 1342b411b363SPhilipp Reisner if (plain_bits < (len << 3)) { 1343b411b363SPhilipp Reisner /* incompressible with this method. 1344b411b363SPhilipp Reisner * we need to rewind both word and bit position. */ 1345b411b363SPhilipp Reisner c->bit_offset -= plain_bits; 1346b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c); 1347b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG; 1348b411b363SPhilipp Reisner return 0; 1349b411b363SPhilipp Reisner } 1350b411b363SPhilipp Reisner 1351b411b363SPhilipp Reisner /* RLE + VLI was able to compress it just fine. 1352b411b363SPhilipp Reisner * update c->word_offset. */ 1353b411b363SPhilipp Reisner bm_xfer_ctx_bit_to_word_offset(c); 1354b411b363SPhilipp Reisner 1355b411b363SPhilipp Reisner /* store pad_bits */ 1356a02d1240SAndreas Gruenbacher dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); 1357b411b363SPhilipp Reisner 1358b411b363SPhilipp Reisner return len; 1359b411b363SPhilipp Reisner } 1360b411b363SPhilipp Reisner 1361f70af118SAndreas Gruenbacher /** 1362f70af118SAndreas Gruenbacher * send_bitmap_rle_or_plain 1363f70af118SAndreas Gruenbacher * 1364f70af118SAndreas Gruenbacher * Return 0 when done, 1 when another iteration is needed, and a negative error 1365f70af118SAndreas Gruenbacher * code upon failure. 1366f70af118SAndreas Gruenbacher */ 1367f70af118SAndreas Gruenbacher static int 136879ed9bd0SAndreas Gruenbacher send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c) 1369b411b363SPhilipp Reisner { 13709f5bdc33SAndreas Gruenbacher struct drbd_socket *sock = &mdev->tconn->data; 137150d0b1adSAndreas Gruenbacher unsigned int header_size = drbd_header_size(mdev->tconn); 1372e658983aSAndreas Gruenbacher struct p_compressed_bm *p = sock->sbuf + header_size; 1373a982dd57SAndreas Gruenbacher int len, err; 1374b411b363SPhilipp Reisner 1375e658983aSAndreas Gruenbacher len = fill_bitmap_rle_bits(mdev, p, 1376e658983aSAndreas Gruenbacher DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c); 1377b411b363SPhilipp Reisner if (len < 0) 1378f70af118SAndreas Gruenbacher return -EIO; 1379b411b363SPhilipp Reisner 1380b411b363SPhilipp Reisner if (len) { 1381a02d1240SAndreas Gruenbacher dcbp_set_code(p, RLE_VLI_Bits); 13829f5bdc33SAndreas Gruenbacher err = __send_command(mdev->tconn, mdev->vnr, sock, 13839f5bdc33SAndreas Gruenbacher P_COMPRESSED_BITMAP, sizeof(*p) + len, 13849f5bdc33SAndreas Gruenbacher NULL, 0); 1385b411b363SPhilipp Reisner c->packets[0]++; 1386e658983aSAndreas Gruenbacher c->bytes[0] += header_size + sizeof(*p) + len; 1387b411b363SPhilipp Reisner 1388b411b363SPhilipp Reisner if (c->bit_offset >= c->bm_bits) 1389b411b363SPhilipp Reisner len = 0; /* DONE */ 1390b411b363SPhilipp Reisner } else { 1391b411b363SPhilipp Reisner /* was not compressible. 1392b411b363SPhilipp Reisner * send a buffer full of plain text bits instead. */ 139350d0b1adSAndreas Gruenbacher unsigned int data_size; 139450d0b1adSAndreas Gruenbacher unsigned long num_words; 1395e658983aSAndreas Gruenbacher unsigned long *p = sock->sbuf + header_size; 139650d0b1adSAndreas Gruenbacher 139750d0b1adSAndreas Gruenbacher data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; 1398e658983aSAndreas Gruenbacher num_words = min_t(size_t, data_size / sizeof(*p), 139950d0b1adSAndreas Gruenbacher c->bm_words - c->word_offset); 1400e658983aSAndreas Gruenbacher len = num_words * sizeof(*p); 1401b411b363SPhilipp Reisner if (len) 1402e658983aSAndreas Gruenbacher drbd_bm_get_lel(mdev, c->word_offset, num_words, p); 1403e658983aSAndreas Gruenbacher err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0); 1404b411b363SPhilipp Reisner c->word_offset += num_words; 1405b411b363SPhilipp Reisner c->bit_offset = c->word_offset * BITS_PER_LONG; 1406b411b363SPhilipp Reisner 1407b411b363SPhilipp Reisner c->packets[1]++; 140850d0b1adSAndreas Gruenbacher c->bytes[1] += header_size + len; 1409b411b363SPhilipp Reisner 1410b411b363SPhilipp Reisner if (c->bit_offset > c->bm_bits) 1411b411b363SPhilipp Reisner c->bit_offset = c->bm_bits; 1412b411b363SPhilipp Reisner } 1413a982dd57SAndreas Gruenbacher if (!err) { 1414f70af118SAndreas Gruenbacher if (len == 0) { 1415b411b363SPhilipp Reisner INFO_bm_xfer_stats(mdev, "send", c); 1416f70af118SAndreas Gruenbacher return 0; 1417f70af118SAndreas Gruenbacher } else 1418f70af118SAndreas Gruenbacher return 1; 1419f70af118SAndreas Gruenbacher } 1420f70af118SAndreas Gruenbacher return -EIO; 1421b411b363SPhilipp Reisner } 1422b411b363SPhilipp Reisner 1423b411b363SPhilipp Reisner /* See the comment at receive_bitmap() */ 1424058820cdSAndreas Gruenbacher static int _drbd_send_bitmap(struct drbd_conf *mdev) 1425b411b363SPhilipp Reisner { 1426b411b363SPhilipp Reisner struct bm_xfer_ctx c; 1427f70af118SAndreas Gruenbacher int err; 1428b411b363SPhilipp Reisner 1429841ce241SAndreas Gruenbacher if (!expect(mdev->bitmap)) 1430841ce241SAndreas Gruenbacher return false; 1431b411b363SPhilipp Reisner 1432b411b363SPhilipp Reisner if (get_ldev(mdev)) { 1433b411b363SPhilipp Reisner if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { 1434b411b363SPhilipp Reisner dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); 1435b411b363SPhilipp Reisner drbd_bm_set_all(mdev); 1436b411b363SPhilipp Reisner if (drbd_bm_write(mdev)) { 1437b411b363SPhilipp Reisner /* write_bm did fail! Leave full sync flag set in Meta P_DATA 1438b411b363SPhilipp Reisner * but otherwise process as per normal - need to tell other 1439b411b363SPhilipp Reisner * side that a full resync is required! */ 1440b411b363SPhilipp Reisner dev_err(DEV, "Failed to write bitmap to disk!\n"); 1441b411b363SPhilipp Reisner } else { 1442b411b363SPhilipp Reisner drbd_md_clear_flag(mdev, MDF_FULL_SYNC); 1443b411b363SPhilipp Reisner drbd_md_sync(mdev); 1444b411b363SPhilipp Reisner } 1445b411b363SPhilipp Reisner } 1446b411b363SPhilipp Reisner put_ldev(mdev); 1447b411b363SPhilipp Reisner } 1448b411b363SPhilipp Reisner 1449b411b363SPhilipp Reisner c = (struct bm_xfer_ctx) { 1450b411b363SPhilipp Reisner .bm_bits = drbd_bm_bits(mdev), 1451b411b363SPhilipp Reisner .bm_words = drbd_bm_words(mdev), 1452b411b363SPhilipp Reisner }; 1453b411b363SPhilipp Reisner 1454b411b363SPhilipp Reisner do { 145579ed9bd0SAndreas Gruenbacher err = send_bitmap_rle_or_plain(mdev, &c); 1456f70af118SAndreas Gruenbacher } while (err > 0); 1457b411b363SPhilipp Reisner 1458f70af118SAndreas Gruenbacher return err == 0; 1459b411b363SPhilipp Reisner } 1460b411b363SPhilipp Reisner 1461b411b363SPhilipp Reisner int drbd_send_bitmap(struct drbd_conf *mdev) 1462b411b363SPhilipp Reisner { 14639f5bdc33SAndreas Gruenbacher struct drbd_socket *sock = &mdev->tconn->data; 14649f5bdc33SAndreas Gruenbacher int err = -1; 1465b411b363SPhilipp Reisner 14669f5bdc33SAndreas Gruenbacher mutex_lock(&sock->mutex); 14679f5bdc33SAndreas Gruenbacher if (sock->socket) 1468b411b363SPhilipp Reisner err = !_drbd_send_bitmap(mdev); 14699f5bdc33SAndreas Gruenbacher mutex_unlock(&sock->mutex); 1470b411b363SPhilipp Reisner return err; 1471b411b363SPhilipp Reisner } 14729f5bdc33SAndreas Gruenbacher 1473d4e67d7cSAndreas Gruenbacher void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) 1474b411b363SPhilipp Reisner { 14759f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 14769f5bdc33SAndreas Gruenbacher struct p_barrier_ack *p; 1477b411b363SPhilipp Reisner 14789f5bdc33SAndreas Gruenbacher if (mdev->state.conn < C_CONNECTED) 14799f5bdc33SAndreas Gruenbacher return; 1480b411b363SPhilipp Reisner 14819f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->meta; 14829f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 14839f5bdc33SAndreas Gruenbacher if (!p) 14849f5bdc33SAndreas Gruenbacher return; 14859f5bdc33SAndreas Gruenbacher p->barrier = barrier_nr; 14869f5bdc33SAndreas Gruenbacher p->set_size = cpu_to_be32(set_size); 14879f5bdc33SAndreas Gruenbacher drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0); 1488b411b363SPhilipp Reisner } 1489b411b363SPhilipp Reisner 1490b411b363SPhilipp Reisner /** 1491b411b363SPhilipp Reisner * _drbd_send_ack() - Sends an ack packet 1492b411b363SPhilipp Reisner * @mdev: DRBD device. 1493b411b363SPhilipp Reisner * @cmd: Packet command code. 1494b411b363SPhilipp Reisner * @sector: sector, needs to be in big endian byte order 1495b411b363SPhilipp Reisner * @blksize: size in byte, needs to be in big endian byte order 1496b411b363SPhilipp Reisner * @block_id: Id, big endian byte order 1497b411b363SPhilipp Reisner */ 1498d8763023SAndreas Gruenbacher static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, 1499d8763023SAndreas Gruenbacher u64 sector, u32 blksize, u64 block_id) 1500b411b363SPhilipp Reisner { 15019f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 15029f5bdc33SAndreas Gruenbacher struct p_block_ack *p; 1503b411b363SPhilipp Reisner 15049f5bdc33SAndreas Gruenbacher if (mdev->state.conn < C_CONNECTED) 1505a8c32aa8SAndreas Gruenbacher return -EIO; 15069f5bdc33SAndreas Gruenbacher 15079f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->meta; 15089f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 15099f5bdc33SAndreas Gruenbacher if (!p) 15109f5bdc33SAndreas Gruenbacher return -EIO; 15119f5bdc33SAndreas Gruenbacher p->sector = sector; 15129f5bdc33SAndreas Gruenbacher p->block_id = block_id; 15139f5bdc33SAndreas Gruenbacher p->blksize = blksize; 15149f5bdc33SAndreas Gruenbacher p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); 15159f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); 1516b411b363SPhilipp Reisner } 1517b411b363SPhilipp Reisner 15182b2bf214SLars Ellenberg /* dp->sector and dp->block_id already/still in network byte order, 15192b2bf214SLars Ellenberg * data_size is payload size according to dp->head, 15202b2bf214SLars Ellenberg * and may need to be corrected for digest size. */ 1521a9a9994dSAndreas Gruenbacher void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, 15222b2bf214SLars Ellenberg struct p_data *dp, int data_size) 1523b411b363SPhilipp Reisner { 152488104ca4SAndreas Gruenbacher if (mdev->tconn->peer_integrity_tfm) 152588104ca4SAndreas Gruenbacher data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm); 1526a9a9994dSAndreas Gruenbacher _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), 1527b411b363SPhilipp Reisner dp->block_id); 1528b411b363SPhilipp Reisner } 1529b411b363SPhilipp Reisner 1530a9a9994dSAndreas Gruenbacher void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, 1531b411b363SPhilipp Reisner struct p_block_req *rp) 1532b411b363SPhilipp Reisner { 1533a9a9994dSAndreas Gruenbacher _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); 1534b411b363SPhilipp Reisner } 1535b411b363SPhilipp Reisner 1536b411b363SPhilipp Reisner /** 1537b411b363SPhilipp Reisner * drbd_send_ack() - Sends an ack packet 1538db830c46SAndreas Gruenbacher * @mdev: DRBD device 1539db830c46SAndreas Gruenbacher * @cmd: packet command code 1540db830c46SAndreas Gruenbacher * @peer_req: peer request 1541b411b363SPhilipp Reisner */ 1542d8763023SAndreas Gruenbacher int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, 1543db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 1544b411b363SPhilipp Reisner { 1545dd516121SAndreas Gruenbacher return _drbd_send_ack(mdev, cmd, 1546db830c46SAndreas Gruenbacher cpu_to_be64(peer_req->i.sector), 1547db830c46SAndreas Gruenbacher cpu_to_be32(peer_req->i.size), 1548db830c46SAndreas Gruenbacher peer_req->block_id); 1549b411b363SPhilipp Reisner } 1550b411b363SPhilipp Reisner 1551b411b363SPhilipp Reisner /* This function misuses the block_id field to signal if the blocks 1552b411b363SPhilipp Reisner * are is sync or not. */ 1553d8763023SAndreas Gruenbacher int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, 1554b411b363SPhilipp Reisner sector_t sector, int blksize, u64 block_id) 1555b411b363SPhilipp Reisner { 1556fa79abd8SAndreas Gruenbacher return _drbd_send_ack(mdev, cmd, 1557b411b363SPhilipp Reisner cpu_to_be64(sector), 1558b411b363SPhilipp Reisner cpu_to_be32(blksize), 1559b411b363SPhilipp Reisner cpu_to_be64(block_id)); 1560b411b363SPhilipp Reisner } 1561b411b363SPhilipp Reisner 1562b411b363SPhilipp Reisner int drbd_send_drequest(struct drbd_conf *mdev, int cmd, 1563b411b363SPhilipp Reisner sector_t sector, int size, u64 block_id) 1564b411b363SPhilipp Reisner { 15659f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 15669f5bdc33SAndreas Gruenbacher struct p_block_req *p; 1567b411b363SPhilipp Reisner 15689f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 15699f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 15709f5bdc33SAndreas Gruenbacher if (!p) 15719f5bdc33SAndreas Gruenbacher return -EIO; 15729f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(sector); 15739f5bdc33SAndreas Gruenbacher p->block_id = block_id; 15749f5bdc33SAndreas Gruenbacher p->blksize = cpu_to_be32(size); 15759f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0); 1576b411b363SPhilipp Reisner } 1577b411b363SPhilipp Reisner 1578d8763023SAndreas Gruenbacher int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, 1579d8763023SAndreas Gruenbacher void *digest, int digest_size, enum drbd_packet cmd) 1580b411b363SPhilipp Reisner { 15819f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 15829f5bdc33SAndreas Gruenbacher struct p_block_req *p; 1583b411b363SPhilipp Reisner 15849f5bdc33SAndreas Gruenbacher /* FIXME: Put the digest into the preallocated socket buffer. */ 1585b411b363SPhilipp Reisner 15869f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 15879f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 15889f5bdc33SAndreas Gruenbacher if (!p) 15899f5bdc33SAndreas Gruenbacher return -EIO; 15909f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(sector); 15919f5bdc33SAndreas Gruenbacher p->block_id = ID_SYNCER /* unused */; 15929f5bdc33SAndreas Gruenbacher p->blksize = cpu_to_be32(size); 15939f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, cmd, sizeof(*p), 15949f5bdc33SAndreas Gruenbacher digest, digest_size); 1595b411b363SPhilipp Reisner } 1596b411b363SPhilipp Reisner 1597b411b363SPhilipp Reisner int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) 1598b411b363SPhilipp Reisner { 15999f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 16009f5bdc33SAndreas Gruenbacher struct p_block_req *p; 1601b411b363SPhilipp Reisner 16029f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 16039f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 16049f5bdc33SAndreas Gruenbacher if (!p) 16059f5bdc33SAndreas Gruenbacher return -EIO; 16069f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(sector); 16079f5bdc33SAndreas Gruenbacher p->block_id = ID_SYNCER /* unused */; 16089f5bdc33SAndreas Gruenbacher p->blksize = cpu_to_be32(size); 16099f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0); 1610b411b363SPhilipp Reisner } 1611b411b363SPhilipp Reisner 1612b411b363SPhilipp Reisner /* called on sndtimeo 161381e84650SAndreas Gruenbacher * returns false if we should retry, 161481e84650SAndreas Gruenbacher * true if we think connection is dead 1615b411b363SPhilipp Reisner */ 16161a7ba646SPhilipp Reisner static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock) 1617b411b363SPhilipp Reisner { 1618b411b363SPhilipp Reisner int drop_it; 1619b411b363SPhilipp Reisner /* long elapsed = (long)(jiffies - mdev->last_received); */ 1620b411b363SPhilipp Reisner 16211a7ba646SPhilipp Reisner drop_it = tconn->meta.socket == sock 16221a7ba646SPhilipp Reisner || !tconn->asender.task 16231a7ba646SPhilipp Reisner || get_t_state(&tconn->asender) != RUNNING 1624bbeb641cSPhilipp Reisner || tconn->cstate < C_WF_REPORT_PARAMS; 1625b411b363SPhilipp Reisner 1626b411b363SPhilipp Reisner if (drop_it) 162781e84650SAndreas Gruenbacher return true; 1628b411b363SPhilipp Reisner 16291a7ba646SPhilipp Reisner drop_it = !--tconn->ko_count; 1630b411b363SPhilipp Reisner if (!drop_it) { 16311a7ba646SPhilipp Reisner conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n", 16321a7ba646SPhilipp Reisner current->comm, current->pid, tconn->ko_count); 16331a7ba646SPhilipp Reisner request_ping(tconn); 1634b411b363SPhilipp Reisner } 1635b411b363SPhilipp Reisner 1636b411b363SPhilipp Reisner return drop_it; /* && (mdev->state == R_PRIMARY) */; 1637b411b363SPhilipp Reisner } 1638b411b363SPhilipp Reisner 16391a7ba646SPhilipp Reisner static void drbd_update_congested(struct drbd_tconn *tconn) 16409e204cddSAndreas Gruenbacher { 16411a7ba646SPhilipp Reisner struct sock *sk = tconn->data.socket->sk; 16429e204cddSAndreas Gruenbacher if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) 16431a7ba646SPhilipp Reisner set_bit(NET_CONGESTED, &tconn->flags); 16449e204cddSAndreas Gruenbacher } 16459e204cddSAndreas Gruenbacher 1646b411b363SPhilipp Reisner /* The idea of sendpage seems to be to put some kind of reference 1647b411b363SPhilipp Reisner * to the page into the skb, and to hand it over to the NIC. In 1648b411b363SPhilipp Reisner * this process get_page() gets called. 1649b411b363SPhilipp Reisner * 1650b411b363SPhilipp Reisner * As soon as the page was really sent over the network put_page() 1651b411b363SPhilipp Reisner * gets called by some part of the network layer. [ NIC driver? ] 1652b411b363SPhilipp Reisner * 1653b411b363SPhilipp Reisner * [ get_page() / put_page() increment/decrement the count. If count 1654b411b363SPhilipp Reisner * reaches 0 the page will be freed. ] 1655b411b363SPhilipp Reisner * 1656b411b363SPhilipp Reisner * This works nicely with pages from FSs. 1657b411b363SPhilipp Reisner * But this means that in protocol A we might signal IO completion too early! 1658b411b363SPhilipp Reisner * 1659b411b363SPhilipp Reisner * In order not to corrupt data during a resync we must make sure 1660b411b363SPhilipp Reisner * that we do not reuse our own buffer pages (EEs) to early, therefore 1661b411b363SPhilipp Reisner * we have the net_ee list. 1662b411b363SPhilipp Reisner * 1663b411b363SPhilipp Reisner * XFS seems to have problems, still, it submits pages with page_count == 0! 1664b411b363SPhilipp Reisner * As a workaround, we disable sendpage on pages 1665b411b363SPhilipp Reisner * with page_count == 0 or PageSlab. 1666b411b363SPhilipp Reisner */ 1667b411b363SPhilipp Reisner static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, 1668ba11ad9aSLars Ellenberg int offset, size_t size, unsigned msg_flags) 1669b411b363SPhilipp Reisner { 1670b987427bSAndreas Gruenbacher struct socket *socket; 1671b987427bSAndreas Gruenbacher void *addr; 1672b987427bSAndreas Gruenbacher int err; 1673b987427bSAndreas Gruenbacher 1674b987427bSAndreas Gruenbacher socket = mdev->tconn->data.socket; 1675b987427bSAndreas Gruenbacher addr = kmap(page) + offset; 1676b987427bSAndreas Gruenbacher err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags); 1677b411b363SPhilipp Reisner kunmap(page); 1678b987427bSAndreas Gruenbacher if (!err) 1679b411b363SPhilipp Reisner mdev->send_cnt += size >> 9; 1680b987427bSAndreas Gruenbacher return err; 1681b411b363SPhilipp Reisner } 1682b411b363SPhilipp Reisner 1683b411b363SPhilipp Reisner static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, 1684ba11ad9aSLars Ellenberg int offset, size_t size, unsigned msg_flags) 1685b411b363SPhilipp Reisner { 168688b390ffSAndreas Gruenbacher struct socket *socket = mdev->tconn->data.socket; 1687b411b363SPhilipp Reisner mm_segment_t oldfs = get_fs(); 1688b411b363SPhilipp Reisner int len = size; 168988b390ffSAndreas Gruenbacher int err = -EIO; 1690b411b363SPhilipp Reisner 1691b411b363SPhilipp Reisner /* e.g. XFS meta- & log-data is in slab pages, which have a 1692b411b363SPhilipp Reisner * page_count of 0 and/or have PageSlab() set. 1693b411b363SPhilipp Reisner * we cannot use send_page for those, as that does get_page(); 1694b411b363SPhilipp Reisner * put_page(); and would cause either a VM_BUG directly, or 1695b411b363SPhilipp Reisner * __page_cache_release a page that would actually still be referenced 1696b411b363SPhilipp Reisner * by someone, leading to some obscure delayed Oops somewhere else. */ 1697b411b363SPhilipp Reisner if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) 169888b390ffSAndreas Gruenbacher return _drbd_no_send_page(mdev, page, offset, size, msg_flags); 1699b411b363SPhilipp Reisner 1700ba11ad9aSLars Ellenberg msg_flags |= MSG_NOSIGNAL; 17011a7ba646SPhilipp Reisner drbd_update_congested(mdev->tconn); 1702b411b363SPhilipp Reisner set_fs(KERNEL_DS); 1703b411b363SPhilipp Reisner do { 170488b390ffSAndreas Gruenbacher int sent; 170588b390ffSAndreas Gruenbacher 170688b390ffSAndreas Gruenbacher sent = socket->ops->sendpage(socket, page, offset, len, msg_flags); 170788b390ffSAndreas Gruenbacher if (sent <= 0) { 1708b411b363SPhilipp Reisner if (sent == -EAGAIN) { 170988b390ffSAndreas Gruenbacher if (we_should_drop_the_connection(mdev->tconn, socket)) 1710b411b363SPhilipp Reisner break; 1711b411b363SPhilipp Reisner continue; 1712b411b363SPhilipp Reisner } 1713b411b363SPhilipp Reisner dev_warn(DEV, "%s: size=%d len=%d sent=%d\n", 1714b411b363SPhilipp Reisner __func__, (int)size, len, sent); 171588b390ffSAndreas Gruenbacher if (sent < 0) 171688b390ffSAndreas Gruenbacher err = sent; 1717b411b363SPhilipp Reisner break; 1718b411b363SPhilipp Reisner } 1719b411b363SPhilipp Reisner len -= sent; 1720b411b363SPhilipp Reisner offset += sent; 1721b411b363SPhilipp Reisner } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); 1722b411b363SPhilipp Reisner set_fs(oldfs); 172301a311a5SPhilipp Reisner clear_bit(NET_CONGESTED, &mdev->tconn->flags); 1724b411b363SPhilipp Reisner 172588b390ffSAndreas Gruenbacher if (len == 0) { 172688b390ffSAndreas Gruenbacher err = 0; 1727b411b363SPhilipp Reisner mdev->send_cnt += size >> 9; 172888b390ffSAndreas Gruenbacher } 172988b390ffSAndreas Gruenbacher return err; 1730b411b363SPhilipp Reisner } 1731b411b363SPhilipp Reisner 1732b411b363SPhilipp Reisner static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) 1733b411b363SPhilipp Reisner { 1734b411b363SPhilipp Reisner struct bio_vec *bvec; 1735b411b363SPhilipp Reisner int i; 1736ba11ad9aSLars Ellenberg /* hint all but last page with MSG_MORE */ 1737b411b363SPhilipp Reisner __bio_for_each_segment(bvec, bio, i, 0) { 17387fae55daSAndreas Gruenbacher int err; 17397fae55daSAndreas Gruenbacher 17407fae55daSAndreas Gruenbacher err = _drbd_no_send_page(mdev, bvec->bv_page, 1741ba11ad9aSLars Ellenberg bvec->bv_offset, bvec->bv_len, 17427fae55daSAndreas Gruenbacher i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 17437fae55daSAndreas Gruenbacher if (err) 17447fae55daSAndreas Gruenbacher return err; 1745b411b363SPhilipp Reisner } 17467fae55daSAndreas Gruenbacher return 0; 1747b411b363SPhilipp Reisner } 1748b411b363SPhilipp Reisner 1749b411b363SPhilipp Reisner static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) 1750b411b363SPhilipp Reisner { 1751b411b363SPhilipp Reisner struct bio_vec *bvec; 1752b411b363SPhilipp Reisner int i; 1753ba11ad9aSLars Ellenberg /* hint all but last page with MSG_MORE */ 1754b411b363SPhilipp Reisner __bio_for_each_segment(bvec, bio, i, 0) { 17557fae55daSAndreas Gruenbacher int err; 17567fae55daSAndreas Gruenbacher 17577fae55daSAndreas Gruenbacher err = _drbd_send_page(mdev, bvec->bv_page, 1758ba11ad9aSLars Ellenberg bvec->bv_offset, bvec->bv_len, 17597fae55daSAndreas Gruenbacher i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 17607fae55daSAndreas Gruenbacher if (err) 17617fae55daSAndreas Gruenbacher return err; 1762b411b363SPhilipp Reisner } 17637fae55daSAndreas Gruenbacher return 0; 1764b411b363SPhilipp Reisner } 1765b411b363SPhilipp Reisner 1766db830c46SAndreas Gruenbacher static int _drbd_send_zc_ee(struct drbd_conf *mdev, 1767db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 176845bb912bSLars Ellenberg { 1769db830c46SAndreas Gruenbacher struct page *page = peer_req->pages; 1770db830c46SAndreas Gruenbacher unsigned len = peer_req->i.size; 17719f69230cSAndreas Gruenbacher int err; 1772db830c46SAndreas Gruenbacher 1773ba11ad9aSLars Ellenberg /* hint all but last page with MSG_MORE */ 177445bb912bSLars Ellenberg page_chain_for_each(page) { 177545bb912bSLars Ellenberg unsigned l = min_t(unsigned, len, PAGE_SIZE); 17769f69230cSAndreas Gruenbacher 17779f69230cSAndreas Gruenbacher err = _drbd_send_page(mdev, page, 0, l, 17789f69230cSAndreas Gruenbacher page_chain_next(page) ? MSG_MORE : 0); 17799f69230cSAndreas Gruenbacher if (err) 17809f69230cSAndreas Gruenbacher return err; 178145bb912bSLars Ellenberg len -= l; 178245bb912bSLars Ellenberg } 17839f69230cSAndreas Gruenbacher return 0; 178445bb912bSLars Ellenberg } 178545bb912bSLars Ellenberg 178676d2e7ecSPhilipp Reisner static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) 178776d2e7ecSPhilipp Reisner { 178831890f4aSPhilipp Reisner if (mdev->tconn->agreed_pro_version >= 95) 178976d2e7ecSPhilipp Reisner return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | 179076d2e7ecSPhilipp Reisner (bi_rw & REQ_FUA ? DP_FUA : 0) | 179176d2e7ecSPhilipp Reisner (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) | 179276d2e7ecSPhilipp Reisner (bi_rw & REQ_DISCARD ? DP_DISCARD : 0); 179376d2e7ecSPhilipp Reisner else 1794721a9602SJens Axboe return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; 179576d2e7ecSPhilipp Reisner } 179676d2e7ecSPhilipp Reisner 1797b411b363SPhilipp Reisner /* Used to send write requests 1798b411b363SPhilipp Reisner * R_PRIMARY -> Peer (P_DATA) 1799b411b363SPhilipp Reisner */ 1800b411b363SPhilipp Reisner int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) 1801b411b363SPhilipp Reisner { 18029f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 18039f5bdc33SAndreas Gruenbacher struct p_data *p; 1804b411b363SPhilipp Reisner unsigned int dp_flags = 0; 1805b411b363SPhilipp Reisner int dgs; 18069f5bdc33SAndreas Gruenbacher int err; 1807b411b363SPhilipp Reisner 180846e1ce41SPhilipp Reisner sock = &mdev->tconn->data; 180946e1ce41SPhilipp Reisner p = drbd_prepare_command(mdev, sock); 18107d4c782cSAndreas Gruenbacher dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; 1811b411b363SPhilipp Reisner 18129f5bdc33SAndreas Gruenbacher if (!p) 18139f5bdc33SAndreas Gruenbacher return -EIO; 18149f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(req->i.sector); 18159f5bdc33SAndreas Gruenbacher p->block_id = (unsigned long)req; 18169f5bdc33SAndreas Gruenbacher p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); 181776d2e7ecSPhilipp Reisner dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); 1818b411b363SPhilipp Reisner if (mdev->state.conn >= C_SYNC_SOURCE && 1819b411b363SPhilipp Reisner mdev->state.conn <= C_PAUSED_SYNC_T) 1820b411b363SPhilipp Reisner dp_flags |= DP_MAY_SET_IN_SYNC; 1821303d1448SPhilipp Reisner if (mdev->tconn->agreed_pro_version >= 100) { 1822303d1448SPhilipp Reisner if (req->rq_state & RQ_EXP_RECEIVE_ACK) 1823303d1448SPhilipp Reisner dp_flags |= DP_SEND_RECEIVE_ACK; 1824303d1448SPhilipp Reisner if (req->rq_state & RQ_EXP_WRITE_ACK) 1825303d1448SPhilipp Reisner dp_flags |= DP_SEND_WRITE_ACK; 1826303d1448SPhilipp Reisner } 18279f5bdc33SAndreas Gruenbacher p->dp_flags = cpu_to_be32(dp_flags); 18289f5bdc33SAndreas Gruenbacher if (dgs) 18298d412fc6SAndreas Gruenbacher drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1); 18309f5bdc33SAndreas Gruenbacher err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); 18316bdb9b0eSAndreas Gruenbacher if (!err) { 1832470be44aSLars Ellenberg /* For protocol A, we have to memcpy the payload into 1833470be44aSLars Ellenberg * socket buffers, as we may complete right away 1834470be44aSLars Ellenberg * as soon as we handed it over to tcp, at which point the data 1835470be44aSLars Ellenberg * pages may become invalid. 1836470be44aSLars Ellenberg * 1837470be44aSLars Ellenberg * For data-integrity enabled, we copy it as well, so we can be 1838470be44aSLars Ellenberg * sure that even if the bio pages may still be modified, it 1839470be44aSLars Ellenberg * won't change the data on the wire, thus if the digest checks 1840470be44aSLars Ellenberg * out ok after sending on this side, but does not fit on the 1841470be44aSLars Ellenberg * receiving side, we sure have detected corruption elsewhere. 1842470be44aSLars Ellenberg */ 1843303d1448SPhilipp Reisner if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs) 18446bdb9b0eSAndreas Gruenbacher err = _drbd_send_bio(mdev, req->master_bio); 1845b411b363SPhilipp Reisner else 18466bdb9b0eSAndreas Gruenbacher err = _drbd_send_zc_bio(mdev, req->master_bio); 1847470be44aSLars Ellenberg 1848470be44aSLars Ellenberg /* double check digest, sometimes buffers have been modified in flight. */ 1849470be44aSLars Ellenberg if (dgs > 0 && dgs <= 64) { 185024c4830cSBart Van Assche /* 64 byte, 512 bit, is the largest digest size 1851470be44aSLars Ellenberg * currently supported in kernel crypto. */ 1852470be44aSLars Ellenberg unsigned char digest[64]; 18538d412fc6SAndreas Gruenbacher drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest); 18549f5bdc33SAndreas Gruenbacher if (memcmp(p + 1, digest, dgs)) { 1855470be44aSLars Ellenberg dev_warn(DEV, 1856470be44aSLars Ellenberg "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", 1857ace652acSAndreas Gruenbacher (unsigned long long)req->i.sector, req->i.size); 1858470be44aSLars Ellenberg } 1859470be44aSLars Ellenberg } /* else if (dgs > 64) { 1860470be44aSLars Ellenberg ... Be noisy about digest too large ... 1861470be44aSLars Ellenberg } */ 1862b411b363SPhilipp Reisner } 18639f5bdc33SAndreas Gruenbacher mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ 1864bd26bfc5SPhilipp Reisner 18656bdb9b0eSAndreas Gruenbacher return err; 1866b411b363SPhilipp Reisner } 1867b411b363SPhilipp Reisner 1868b411b363SPhilipp Reisner /* answer packet, used to send data back for read requests: 1869b411b363SPhilipp Reisner * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) 1870b411b363SPhilipp Reisner * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) 1871b411b363SPhilipp Reisner */ 1872d8763023SAndreas Gruenbacher int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, 1873db830c46SAndreas Gruenbacher struct drbd_peer_request *peer_req) 1874b411b363SPhilipp Reisner { 18759f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 18769f5bdc33SAndreas Gruenbacher struct p_data *p; 18777b57b89dSAndreas Gruenbacher int err; 1878b411b363SPhilipp Reisner int dgs; 1879b411b363SPhilipp Reisner 188046e1ce41SPhilipp Reisner sock = &mdev->tconn->data; 188146e1ce41SPhilipp Reisner p = drbd_prepare_command(mdev, sock); 188246e1ce41SPhilipp Reisner 18837d4c782cSAndreas Gruenbacher dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0; 1884b411b363SPhilipp Reisner 18859f5bdc33SAndreas Gruenbacher if (!p) 18869f5bdc33SAndreas Gruenbacher return -EIO; 18879f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(peer_req->i.sector); 18889f5bdc33SAndreas Gruenbacher p->block_id = peer_req->block_id; 18899f5bdc33SAndreas Gruenbacher p->seq_num = 0; /* unused */ 18909f5bdc33SAndreas Gruenbacher if (dgs) 18918d412fc6SAndreas Gruenbacher drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1); 18929f5bdc33SAndreas Gruenbacher err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size); 18937b57b89dSAndreas Gruenbacher if (!err) 18947b57b89dSAndreas Gruenbacher err = _drbd_send_zc_ee(mdev, peer_req); 18959f5bdc33SAndreas Gruenbacher mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ 1896bd26bfc5SPhilipp Reisner 18977b57b89dSAndreas Gruenbacher return err; 1898b411b363SPhilipp Reisner } 1899b411b363SPhilipp Reisner 19008f7bed77SAndreas Gruenbacher int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req) 190173a01a18SPhilipp Reisner { 19029f5bdc33SAndreas Gruenbacher struct drbd_socket *sock; 19039f5bdc33SAndreas Gruenbacher struct p_block_desc *p; 190473a01a18SPhilipp Reisner 19059f5bdc33SAndreas Gruenbacher sock = &mdev->tconn->data; 19069f5bdc33SAndreas Gruenbacher p = drbd_prepare_command(mdev, sock); 19079f5bdc33SAndreas Gruenbacher if (!p) 19089f5bdc33SAndreas Gruenbacher return -EIO; 19099f5bdc33SAndreas Gruenbacher p->sector = cpu_to_be64(req->i.sector); 19109f5bdc33SAndreas Gruenbacher p->blksize = cpu_to_be32(req->i.size); 19119f5bdc33SAndreas Gruenbacher return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0); 191273a01a18SPhilipp Reisner } 191373a01a18SPhilipp Reisner 1914b411b363SPhilipp Reisner /* 1915b411b363SPhilipp Reisner drbd_send distinguishes two cases: 1916b411b363SPhilipp Reisner 1917b411b363SPhilipp Reisner Packets sent via the data socket "sock" 1918b411b363SPhilipp Reisner and packets sent via the meta data socket "msock" 1919b411b363SPhilipp Reisner 1920b411b363SPhilipp Reisner sock msock 1921b411b363SPhilipp Reisner -----------------+-------------------------+------------------------------ 1922b411b363SPhilipp Reisner timeout conf.timeout / 2 conf.timeout / 2 1923b411b363SPhilipp Reisner timeout action send a ping via msock Abort communication 1924b411b363SPhilipp Reisner and close all sockets 1925b411b363SPhilipp Reisner */ 1926b411b363SPhilipp Reisner 1927b411b363SPhilipp Reisner /* 1928b411b363SPhilipp Reisner * you must have down()ed the appropriate [m]sock_mutex elsewhere! 1929b411b363SPhilipp Reisner */ 1930bedbd2a5SPhilipp Reisner int drbd_send(struct drbd_tconn *tconn, struct socket *sock, 1931b411b363SPhilipp Reisner void *buf, size_t size, unsigned msg_flags) 1932b411b363SPhilipp Reisner { 1933b411b363SPhilipp Reisner struct kvec iov; 1934b411b363SPhilipp Reisner struct msghdr msg; 1935b411b363SPhilipp Reisner int rv, sent = 0; 1936b411b363SPhilipp Reisner 1937b411b363SPhilipp Reisner if (!sock) 1938c0d42c8eSAndreas Gruenbacher return -EBADR; 1939b411b363SPhilipp Reisner 1940b411b363SPhilipp Reisner /* THINK if (signal_pending) return ... ? */ 1941b411b363SPhilipp Reisner 1942b411b363SPhilipp Reisner iov.iov_base = buf; 1943b411b363SPhilipp Reisner iov.iov_len = size; 1944b411b363SPhilipp Reisner 1945b411b363SPhilipp Reisner msg.msg_name = NULL; 1946b411b363SPhilipp Reisner msg.msg_namelen = 0; 1947b411b363SPhilipp Reisner msg.msg_control = NULL; 1948b411b363SPhilipp Reisner msg.msg_controllen = 0; 1949b411b363SPhilipp Reisner msg.msg_flags = msg_flags | MSG_NOSIGNAL; 1950b411b363SPhilipp Reisner 1951bedbd2a5SPhilipp Reisner if (sock == tconn->data.socket) { 195244ed167dSPhilipp Reisner rcu_read_lock(); 195344ed167dSPhilipp Reisner tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count; 195444ed167dSPhilipp Reisner rcu_read_unlock(); 1955bedbd2a5SPhilipp Reisner drbd_update_congested(tconn); 1956b411b363SPhilipp Reisner } 1957b411b363SPhilipp Reisner do { 1958b411b363SPhilipp Reisner /* STRANGE 1959b411b363SPhilipp Reisner * tcp_sendmsg does _not_ use its size parameter at all ? 1960b411b363SPhilipp Reisner * 1961b411b363SPhilipp Reisner * -EAGAIN on timeout, -EINTR on signal. 1962b411b363SPhilipp Reisner */ 1963b411b363SPhilipp Reisner /* THINK 1964b411b363SPhilipp Reisner * do we need to block DRBD_SIG if sock == &meta.socket ?? 1965b411b363SPhilipp Reisner * otherwise wake_asender() might interrupt some send_*Ack ! 1966b411b363SPhilipp Reisner */ 1967b411b363SPhilipp Reisner rv = kernel_sendmsg(sock, &msg, &iov, 1, size); 1968b411b363SPhilipp Reisner if (rv == -EAGAIN) { 1969bedbd2a5SPhilipp Reisner if (we_should_drop_the_connection(tconn, sock)) 1970b411b363SPhilipp Reisner break; 1971b411b363SPhilipp Reisner else 1972b411b363SPhilipp Reisner continue; 1973b411b363SPhilipp Reisner } 1974b411b363SPhilipp Reisner if (rv == -EINTR) { 1975b411b363SPhilipp Reisner flush_signals(current); 1976b411b363SPhilipp Reisner rv = 0; 1977b411b363SPhilipp Reisner } 1978b411b363SPhilipp Reisner if (rv < 0) 1979b411b363SPhilipp Reisner break; 1980b411b363SPhilipp Reisner sent += rv; 1981b411b363SPhilipp Reisner iov.iov_base += rv; 1982b411b363SPhilipp Reisner iov.iov_len -= rv; 1983b411b363SPhilipp Reisner } while (sent < size); 1984b411b363SPhilipp Reisner 1985bedbd2a5SPhilipp Reisner if (sock == tconn->data.socket) 1986bedbd2a5SPhilipp Reisner clear_bit(NET_CONGESTED, &tconn->flags); 1987b411b363SPhilipp Reisner 1988b411b363SPhilipp Reisner if (rv <= 0) { 1989b411b363SPhilipp Reisner if (rv != -EAGAIN) { 1990bedbd2a5SPhilipp Reisner conn_err(tconn, "%s_sendmsg returned %d\n", 1991bedbd2a5SPhilipp Reisner sock == tconn->meta.socket ? "msock" : "sock", 1992b411b363SPhilipp Reisner rv); 1993bbeb641cSPhilipp Reisner conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); 1994b411b363SPhilipp Reisner } else 1995bbeb641cSPhilipp Reisner conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD); 1996b411b363SPhilipp Reisner } 1997b411b363SPhilipp Reisner 1998b411b363SPhilipp Reisner return sent; 1999b411b363SPhilipp Reisner } 2000b411b363SPhilipp Reisner 2001fb708e40SAndreas Gruenbacher /** 2002fb708e40SAndreas Gruenbacher * drbd_send_all - Send an entire buffer 2003fb708e40SAndreas Gruenbacher * 2004fb708e40SAndreas Gruenbacher * Returns 0 upon success and a negative error value otherwise. 2005fb708e40SAndreas Gruenbacher */ 2006fb708e40SAndreas Gruenbacher int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer, 2007fb708e40SAndreas Gruenbacher size_t size, unsigned msg_flags) 2008fb708e40SAndreas Gruenbacher { 2009fb708e40SAndreas Gruenbacher int err; 2010fb708e40SAndreas Gruenbacher 2011fb708e40SAndreas Gruenbacher err = drbd_send(tconn, sock, buffer, size, msg_flags); 2012fb708e40SAndreas Gruenbacher if (err < 0) 2013fb708e40SAndreas Gruenbacher return err; 2014fb708e40SAndreas Gruenbacher if (err != size) 2015fb708e40SAndreas Gruenbacher return -EIO; 2016fb708e40SAndreas Gruenbacher return 0; 2017fb708e40SAndreas Gruenbacher } 2018fb708e40SAndreas Gruenbacher 2019b411b363SPhilipp Reisner static int drbd_open(struct block_device *bdev, fmode_t mode) 2020b411b363SPhilipp Reisner { 2021b411b363SPhilipp Reisner struct drbd_conf *mdev = bdev->bd_disk->private_data; 2022b411b363SPhilipp Reisner unsigned long flags; 2023b411b363SPhilipp Reisner int rv = 0; 2024b411b363SPhilipp Reisner 20252a48fc0aSArnd Bergmann mutex_lock(&drbd_main_mutex); 202687eeee41SPhilipp Reisner spin_lock_irqsave(&mdev->tconn->req_lock, flags); 2027b411b363SPhilipp Reisner /* to have a stable mdev->state.role 2028b411b363SPhilipp Reisner * and no race with updating open_cnt */ 2029b411b363SPhilipp Reisner 2030b411b363SPhilipp Reisner if (mdev->state.role != R_PRIMARY) { 2031b411b363SPhilipp Reisner if (mode & FMODE_WRITE) 2032b411b363SPhilipp Reisner rv = -EROFS; 2033b411b363SPhilipp Reisner else if (!allow_oos) 2034b411b363SPhilipp Reisner rv = -EMEDIUMTYPE; 2035b411b363SPhilipp Reisner } 2036b411b363SPhilipp Reisner 2037b411b363SPhilipp Reisner if (!rv) 2038b411b363SPhilipp Reisner mdev->open_cnt++; 203987eeee41SPhilipp Reisner spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); 20402a48fc0aSArnd Bergmann mutex_unlock(&drbd_main_mutex); 2041b411b363SPhilipp Reisner 2042b411b363SPhilipp Reisner return rv; 2043b411b363SPhilipp Reisner } 2044b411b363SPhilipp Reisner 2045b411b363SPhilipp Reisner static int drbd_release(struct gendisk *gd, fmode_t mode) 2046b411b363SPhilipp Reisner { 2047b411b363SPhilipp Reisner struct drbd_conf *mdev = gd->private_data; 20482a48fc0aSArnd Bergmann mutex_lock(&drbd_main_mutex); 2049b411b363SPhilipp Reisner mdev->open_cnt--; 20502a48fc0aSArnd Bergmann mutex_unlock(&drbd_main_mutex); 2051b411b363SPhilipp Reisner return 0; 2052b411b363SPhilipp Reisner } 2053b411b363SPhilipp Reisner 2054b411b363SPhilipp Reisner static void drbd_set_defaults(struct drbd_conf *mdev) 2055b411b363SPhilipp Reisner { 2056f399002eSLars Ellenberg /* Beware! The actual layout differs 2057f399002eSLars Ellenberg * between big endian and little endian */ 2058da9fbc27SPhilipp Reisner mdev->state = (union drbd_dev_state) { 2059b411b363SPhilipp Reisner { .role = R_SECONDARY, 2060b411b363SPhilipp Reisner .peer = R_UNKNOWN, 2061b411b363SPhilipp Reisner .conn = C_STANDALONE, 2062b411b363SPhilipp Reisner .disk = D_DISKLESS, 2063b411b363SPhilipp Reisner .pdsk = D_UNKNOWN, 2064b411b363SPhilipp Reisner } }; 2065b411b363SPhilipp Reisner } 2066b411b363SPhilipp Reisner 2067b411b363SPhilipp Reisner void drbd_init_set_defaults(struct drbd_conf *mdev) 2068b411b363SPhilipp Reisner { 2069b411b363SPhilipp Reisner /* the memset(,0,) did most of this. 2070b411b363SPhilipp Reisner * note: only assignments, no allocation in here */ 2071b411b363SPhilipp Reisner 2072b411b363SPhilipp Reisner drbd_set_defaults(mdev); 2073b411b363SPhilipp Reisner 2074b411b363SPhilipp Reisner atomic_set(&mdev->ap_bio_cnt, 0); 2075b411b363SPhilipp Reisner atomic_set(&mdev->ap_pending_cnt, 0); 2076b411b363SPhilipp Reisner atomic_set(&mdev->rs_pending_cnt, 0); 2077b411b363SPhilipp Reisner atomic_set(&mdev->unacked_cnt, 0); 2078b411b363SPhilipp Reisner atomic_set(&mdev->local_cnt, 0); 2079435f0740SLars Ellenberg atomic_set(&mdev->pp_in_use_by_net, 0); 2080778f271dSPhilipp Reisner atomic_set(&mdev->rs_sect_in, 0); 20810f0601f4SLars Ellenberg atomic_set(&mdev->rs_sect_ev, 0); 2082759fbdfbSPhilipp Reisner atomic_set(&mdev->ap_in_flight, 0); 2083cdfda633SPhilipp Reisner atomic_set(&mdev->md_io_in_use, 0); 2084b411b363SPhilipp Reisner 20858410da8fSPhilipp Reisner mutex_init(&mdev->own_state_mutex); 20868410da8fSPhilipp Reisner mdev->state_mutex = &mdev->own_state_mutex; 2087b411b363SPhilipp Reisner 2088b411b363SPhilipp Reisner spin_lock_init(&mdev->al_lock); 2089b411b363SPhilipp Reisner spin_lock_init(&mdev->peer_seq_lock); 2090b411b363SPhilipp Reisner spin_lock_init(&mdev->epoch_lock); 2091b411b363SPhilipp Reisner 2092b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->active_ee); 2093b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->sync_ee); 2094b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->done_ee); 2095b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->read_ee); 2096b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->net_ee); 2097b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->resync_reads); 2098b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->resync_work.list); 2099b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->unplug_work.list); 2100e9e6f3ecSLars Ellenberg INIT_LIST_HEAD(&mdev->go_diskless.list); 2101b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->md_sync_work.list); 2102c4752ef1SPhilipp Reisner INIT_LIST_HEAD(&mdev->start_resync_work.list); 2103b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 21040ced55a3SPhilipp Reisner 2105794abb75SPhilipp Reisner mdev->resync_work.cb = w_resync_timer; 2106b411b363SPhilipp Reisner mdev->unplug_work.cb = w_send_write_hint; 2107e9e6f3ecSLars Ellenberg mdev->go_diskless.cb = w_go_diskless; 2108b411b363SPhilipp Reisner mdev->md_sync_work.cb = w_md_sync; 2109b411b363SPhilipp Reisner mdev->bm_io_work.w.cb = w_bitmap_io; 2110370a43e7SPhilipp Reisner mdev->start_resync_work.cb = w_start_resync; 2111a21e9298SPhilipp Reisner 2112a21e9298SPhilipp Reisner mdev->resync_work.mdev = mdev; 2113a21e9298SPhilipp Reisner mdev->unplug_work.mdev = mdev; 2114a21e9298SPhilipp Reisner mdev->go_diskless.mdev = mdev; 2115a21e9298SPhilipp Reisner mdev->md_sync_work.mdev = mdev; 2116a21e9298SPhilipp Reisner mdev->bm_io_work.w.mdev = mdev; 2117a21e9298SPhilipp Reisner mdev->start_resync_work.mdev = mdev; 2118a21e9298SPhilipp Reisner 2119b411b363SPhilipp Reisner init_timer(&mdev->resync_timer); 2120b411b363SPhilipp Reisner init_timer(&mdev->md_sync_timer); 2121370a43e7SPhilipp Reisner init_timer(&mdev->start_resync_timer); 21227fde2be9SPhilipp Reisner init_timer(&mdev->request_timer); 2123b411b363SPhilipp Reisner mdev->resync_timer.function = resync_timer_fn; 2124b411b363SPhilipp Reisner mdev->resync_timer.data = (unsigned long) mdev; 2125b411b363SPhilipp Reisner mdev->md_sync_timer.function = md_sync_timer_fn; 2126b411b363SPhilipp Reisner mdev->md_sync_timer.data = (unsigned long) mdev; 2127370a43e7SPhilipp Reisner mdev->start_resync_timer.function = start_resync_timer_fn; 2128370a43e7SPhilipp Reisner mdev->start_resync_timer.data = (unsigned long) mdev; 21297fde2be9SPhilipp Reisner mdev->request_timer.function = request_timer_fn; 21307fde2be9SPhilipp Reisner mdev->request_timer.data = (unsigned long) mdev; 2131b411b363SPhilipp Reisner 2132b411b363SPhilipp Reisner init_waitqueue_head(&mdev->misc_wait); 2133b411b363SPhilipp Reisner init_waitqueue_head(&mdev->state_wait); 2134b411b363SPhilipp Reisner init_waitqueue_head(&mdev->ee_wait); 2135b411b363SPhilipp Reisner init_waitqueue_head(&mdev->al_wait); 2136b411b363SPhilipp Reisner init_waitqueue_head(&mdev->seq_wait); 2137b411b363SPhilipp Reisner 21382451fc3bSPhilipp Reisner mdev->write_ordering = WO_bdev_flush; 2139b411b363SPhilipp Reisner mdev->resync_wenr = LC_FREE; 214099432fccSPhilipp Reisner mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 214199432fccSPhilipp Reisner mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; 2142b411b363SPhilipp Reisner } 2143b411b363SPhilipp Reisner 2144b411b363SPhilipp Reisner void drbd_mdev_cleanup(struct drbd_conf *mdev) 2145b411b363SPhilipp Reisner { 21461d7734a0SLars Ellenberg int i; 2147e6b3ea83SPhilipp Reisner if (mdev->tconn->receiver.t_state != NONE) 2148b411b363SPhilipp Reisner dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", 2149e6b3ea83SPhilipp Reisner mdev->tconn->receiver.t_state); 2150b411b363SPhilipp Reisner 2151b411b363SPhilipp Reisner /* no need to lock it, I'm the only thread alive */ 2152b411b363SPhilipp Reisner if (atomic_read(&mdev->current_epoch->epoch_size) != 0) 2153b411b363SPhilipp Reisner dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); 2154b411b363SPhilipp Reisner mdev->al_writ_cnt = 2155b411b363SPhilipp Reisner mdev->bm_writ_cnt = 2156b411b363SPhilipp Reisner mdev->read_cnt = 2157b411b363SPhilipp Reisner mdev->recv_cnt = 2158b411b363SPhilipp Reisner mdev->send_cnt = 2159b411b363SPhilipp Reisner mdev->writ_cnt = 2160b411b363SPhilipp Reisner mdev->p_size = 2161b411b363SPhilipp Reisner mdev->rs_start = 2162b411b363SPhilipp Reisner mdev->rs_total = 21631d7734a0SLars Ellenberg mdev->rs_failed = 0; 21641d7734a0SLars Ellenberg mdev->rs_last_events = 0; 21650f0601f4SLars Ellenberg mdev->rs_last_sect_ev = 0; 21661d7734a0SLars Ellenberg for (i = 0; i < DRBD_SYNC_MARKS; i++) { 21671d7734a0SLars Ellenberg mdev->rs_mark_left[i] = 0; 21681d7734a0SLars Ellenberg mdev->rs_mark_time[i] = 0; 21691d7734a0SLars Ellenberg } 217089e58e75SPhilipp Reisner D_ASSERT(mdev->tconn->net_conf == NULL); 2171b411b363SPhilipp Reisner 2172b411b363SPhilipp Reisner drbd_set_my_capacity(mdev, 0); 2173b411b363SPhilipp Reisner if (mdev->bitmap) { 2174b411b363SPhilipp Reisner /* maybe never allocated. */ 217502d9a94bSPhilipp Reisner drbd_bm_resize(mdev, 0, 1); 2176b411b363SPhilipp Reisner drbd_bm_cleanup(mdev); 2177b411b363SPhilipp Reisner } 2178b411b363SPhilipp Reisner 21791d041225SPhilipp Reisner drbd_free_bc(mdev->ldev); 21801d041225SPhilipp Reisner mdev->ldev = NULL; 21811d041225SPhilipp Reisner 21820778286aSPhilipp Reisner clear_bit(AL_SUSPENDED, &mdev->flags); 2183b411b363SPhilipp Reisner 2184b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->active_ee)); 2185b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->sync_ee)); 2186b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->done_ee)); 2187b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->read_ee)); 2188b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->net_ee)); 2189b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->resync_reads)); 2190e42325a5SPhilipp Reisner D_ASSERT(list_empty(&mdev->tconn->data.work.q)); 2191e42325a5SPhilipp Reisner D_ASSERT(list_empty(&mdev->tconn->meta.work.q)); 2192b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->resync_work.list)); 2193b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->unplug_work.list)); 2194e9e6f3ecSLars Ellenberg D_ASSERT(list_empty(&mdev->go_diskless.list)); 21952265b473SLars Ellenberg 21962265b473SLars Ellenberg drbd_set_defaults(mdev); 2197b411b363SPhilipp Reisner } 2198b411b363SPhilipp Reisner 2199b411b363SPhilipp Reisner 2200b411b363SPhilipp Reisner static void drbd_destroy_mempools(void) 2201b411b363SPhilipp Reisner { 2202b411b363SPhilipp Reisner struct page *page; 2203b411b363SPhilipp Reisner 2204b411b363SPhilipp Reisner while (drbd_pp_pool) { 2205b411b363SPhilipp Reisner page = drbd_pp_pool; 2206b411b363SPhilipp Reisner drbd_pp_pool = (struct page *)page_private(page); 2207b411b363SPhilipp Reisner __free_page(page); 2208b411b363SPhilipp Reisner drbd_pp_vacant--; 2209b411b363SPhilipp Reisner } 2210b411b363SPhilipp Reisner 2211b411b363SPhilipp Reisner /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ 2212b411b363SPhilipp Reisner 2213da4a75d2SLars Ellenberg if (drbd_md_io_bio_set) 2214da4a75d2SLars Ellenberg bioset_free(drbd_md_io_bio_set); 221535abf594SLars Ellenberg if (drbd_md_io_page_pool) 221635abf594SLars Ellenberg mempool_destroy(drbd_md_io_page_pool); 2217b411b363SPhilipp Reisner if (drbd_ee_mempool) 2218b411b363SPhilipp Reisner mempool_destroy(drbd_ee_mempool); 2219b411b363SPhilipp Reisner if (drbd_request_mempool) 2220b411b363SPhilipp Reisner mempool_destroy(drbd_request_mempool); 2221b411b363SPhilipp Reisner if (drbd_ee_cache) 2222b411b363SPhilipp Reisner kmem_cache_destroy(drbd_ee_cache); 2223b411b363SPhilipp Reisner if (drbd_request_cache) 2224b411b363SPhilipp Reisner kmem_cache_destroy(drbd_request_cache); 2225b411b363SPhilipp Reisner if (drbd_bm_ext_cache) 2226b411b363SPhilipp Reisner kmem_cache_destroy(drbd_bm_ext_cache); 2227b411b363SPhilipp Reisner if (drbd_al_ext_cache) 2228b411b363SPhilipp Reisner kmem_cache_destroy(drbd_al_ext_cache); 2229b411b363SPhilipp Reisner 2230da4a75d2SLars Ellenberg drbd_md_io_bio_set = NULL; 223135abf594SLars Ellenberg drbd_md_io_page_pool = NULL; 2232b411b363SPhilipp Reisner drbd_ee_mempool = NULL; 2233b411b363SPhilipp Reisner drbd_request_mempool = NULL; 2234b411b363SPhilipp Reisner drbd_ee_cache = NULL; 2235b411b363SPhilipp Reisner drbd_request_cache = NULL; 2236b411b363SPhilipp Reisner drbd_bm_ext_cache = NULL; 2237b411b363SPhilipp Reisner drbd_al_ext_cache = NULL; 2238b411b363SPhilipp Reisner 2239b411b363SPhilipp Reisner return; 2240b411b363SPhilipp Reisner } 2241b411b363SPhilipp Reisner 2242b411b363SPhilipp Reisner static int drbd_create_mempools(void) 2243b411b363SPhilipp Reisner { 2244b411b363SPhilipp Reisner struct page *page; 22451816a2b4SLars Ellenberg const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count; 2246b411b363SPhilipp Reisner int i; 2247b411b363SPhilipp Reisner 2248b411b363SPhilipp Reisner /* prepare our caches and mempools */ 2249b411b363SPhilipp Reisner drbd_request_mempool = NULL; 2250b411b363SPhilipp Reisner drbd_ee_cache = NULL; 2251b411b363SPhilipp Reisner drbd_request_cache = NULL; 2252b411b363SPhilipp Reisner drbd_bm_ext_cache = NULL; 2253b411b363SPhilipp Reisner drbd_al_ext_cache = NULL; 2254b411b363SPhilipp Reisner drbd_pp_pool = NULL; 225535abf594SLars Ellenberg drbd_md_io_page_pool = NULL; 2256da4a75d2SLars Ellenberg drbd_md_io_bio_set = NULL; 2257b411b363SPhilipp Reisner 2258b411b363SPhilipp Reisner /* caches */ 2259b411b363SPhilipp Reisner drbd_request_cache = kmem_cache_create( 2260b411b363SPhilipp Reisner "drbd_req", sizeof(struct drbd_request), 0, 0, NULL); 2261b411b363SPhilipp Reisner if (drbd_request_cache == NULL) 2262b411b363SPhilipp Reisner goto Enomem; 2263b411b363SPhilipp Reisner 2264b411b363SPhilipp Reisner drbd_ee_cache = kmem_cache_create( 2265f6ffca9fSAndreas Gruenbacher "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL); 2266b411b363SPhilipp Reisner if (drbd_ee_cache == NULL) 2267b411b363SPhilipp Reisner goto Enomem; 2268b411b363SPhilipp Reisner 2269b411b363SPhilipp Reisner drbd_bm_ext_cache = kmem_cache_create( 2270b411b363SPhilipp Reisner "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL); 2271b411b363SPhilipp Reisner if (drbd_bm_ext_cache == NULL) 2272b411b363SPhilipp Reisner goto Enomem; 2273b411b363SPhilipp Reisner 2274b411b363SPhilipp Reisner drbd_al_ext_cache = kmem_cache_create( 2275b411b363SPhilipp Reisner "drbd_al", sizeof(struct lc_element), 0, 0, NULL); 2276b411b363SPhilipp Reisner if (drbd_al_ext_cache == NULL) 2277b411b363SPhilipp Reisner goto Enomem; 2278b411b363SPhilipp Reisner 2279b411b363SPhilipp Reisner /* mempools */ 2280da4a75d2SLars Ellenberg drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); 2281da4a75d2SLars Ellenberg if (drbd_md_io_bio_set == NULL) 2282da4a75d2SLars Ellenberg goto Enomem; 2283da4a75d2SLars Ellenberg 228435abf594SLars Ellenberg drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); 228535abf594SLars Ellenberg if (drbd_md_io_page_pool == NULL) 228635abf594SLars Ellenberg goto Enomem; 228735abf594SLars Ellenberg 2288b411b363SPhilipp Reisner drbd_request_mempool = mempool_create(number, 2289b411b363SPhilipp Reisner mempool_alloc_slab, mempool_free_slab, drbd_request_cache); 2290b411b363SPhilipp Reisner if (drbd_request_mempool == NULL) 2291b411b363SPhilipp Reisner goto Enomem; 2292b411b363SPhilipp Reisner 2293b411b363SPhilipp Reisner drbd_ee_mempool = mempool_create(number, 2294b411b363SPhilipp Reisner mempool_alloc_slab, mempool_free_slab, drbd_ee_cache); 22952027ae1fSNicolas Kaiser if (drbd_ee_mempool == NULL) 2296b411b363SPhilipp Reisner goto Enomem; 2297b411b363SPhilipp Reisner 2298b411b363SPhilipp Reisner /* drbd's page pool */ 2299b411b363SPhilipp Reisner spin_lock_init(&drbd_pp_lock); 2300b411b363SPhilipp Reisner 2301b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 2302b411b363SPhilipp Reisner page = alloc_page(GFP_HIGHUSER); 2303b411b363SPhilipp Reisner if (!page) 2304b411b363SPhilipp Reisner goto Enomem; 2305b411b363SPhilipp Reisner set_page_private(page, (unsigned long)drbd_pp_pool); 2306b411b363SPhilipp Reisner drbd_pp_pool = page; 2307b411b363SPhilipp Reisner } 2308b411b363SPhilipp Reisner drbd_pp_vacant = number; 2309b411b363SPhilipp Reisner 2310b411b363SPhilipp Reisner return 0; 2311b411b363SPhilipp Reisner 2312b411b363SPhilipp Reisner Enomem: 2313b411b363SPhilipp Reisner drbd_destroy_mempools(); /* in case we allocated some */ 2314b411b363SPhilipp Reisner return -ENOMEM; 2315b411b363SPhilipp Reisner } 2316b411b363SPhilipp Reisner 2317b411b363SPhilipp Reisner static int drbd_notify_sys(struct notifier_block *this, unsigned long code, 2318b411b363SPhilipp Reisner void *unused) 2319b411b363SPhilipp Reisner { 2320b411b363SPhilipp Reisner /* just so we have it. you never know what interesting things we 2321b411b363SPhilipp Reisner * might want to do here some day... 2322b411b363SPhilipp Reisner */ 2323b411b363SPhilipp Reisner 2324b411b363SPhilipp Reisner return NOTIFY_DONE; 2325b411b363SPhilipp Reisner } 2326b411b363SPhilipp Reisner 2327b411b363SPhilipp Reisner static struct notifier_block drbd_notifier = { 2328b411b363SPhilipp Reisner .notifier_call = drbd_notify_sys, 2329b411b363SPhilipp Reisner }; 2330b411b363SPhilipp Reisner 23317721f567SAndreas Gruenbacher static void drbd_release_all_peer_reqs(struct drbd_conf *mdev) 2332b411b363SPhilipp Reisner { 2333b411b363SPhilipp Reisner int rr; 2334b411b363SPhilipp Reisner 23357721f567SAndreas Gruenbacher rr = drbd_free_peer_reqs(mdev, &mdev->active_ee); 2336b411b363SPhilipp Reisner if (rr) 2337b411b363SPhilipp Reisner dev_err(DEV, "%d EEs in active list found!\n", rr); 2338b411b363SPhilipp Reisner 23397721f567SAndreas Gruenbacher rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee); 2340b411b363SPhilipp Reisner if (rr) 2341b411b363SPhilipp Reisner dev_err(DEV, "%d EEs in sync list found!\n", rr); 2342b411b363SPhilipp Reisner 23437721f567SAndreas Gruenbacher rr = drbd_free_peer_reqs(mdev, &mdev->read_ee); 2344b411b363SPhilipp Reisner if (rr) 2345b411b363SPhilipp Reisner dev_err(DEV, "%d EEs in read list found!\n", rr); 2346b411b363SPhilipp Reisner 23477721f567SAndreas Gruenbacher rr = drbd_free_peer_reqs(mdev, &mdev->done_ee); 2348b411b363SPhilipp Reisner if (rr) 2349b411b363SPhilipp Reisner dev_err(DEV, "%d EEs in done list found!\n", rr); 2350b411b363SPhilipp Reisner 23517721f567SAndreas Gruenbacher rr = drbd_free_peer_reqs(mdev, &mdev->net_ee); 2352b411b363SPhilipp Reisner if (rr) 2353b411b363SPhilipp Reisner dev_err(DEV, "%d EEs in net list found!\n", rr); 2354b411b363SPhilipp Reisner } 2355b411b363SPhilipp Reisner 2356774b3055SPhilipp Reisner /* caution. no locking. */ 235781fa2e67SPhilipp Reisner void drbd_minor_destroy(struct kref *kref) 2358b411b363SPhilipp Reisner { 235981fa2e67SPhilipp Reisner struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); 23609dc9fbb3SPhilipp Reisner struct drbd_tconn *tconn = mdev->tconn; 23619dc9fbb3SPhilipp Reisner 2362cdfda633SPhilipp Reisner del_timer_sync(&mdev->request_timer); 2363cdfda633SPhilipp Reisner 2364b411b363SPhilipp Reisner /* paranoia asserts */ 236570dc65e1SAndreas Gruenbacher D_ASSERT(mdev->open_cnt == 0); 2366e42325a5SPhilipp Reisner D_ASSERT(list_empty(&mdev->tconn->data.work.q)); 2367b411b363SPhilipp Reisner /* end paranoia asserts */ 2368b411b363SPhilipp Reisner 2369b411b363SPhilipp Reisner /* cleanup stuff that may have been allocated during 2370b411b363SPhilipp Reisner * device (re-)configuration or state changes */ 2371b411b363SPhilipp Reisner 2372b411b363SPhilipp Reisner if (mdev->this_bdev) 2373b411b363SPhilipp Reisner bdput(mdev->this_bdev); 2374b411b363SPhilipp Reisner 23751d041225SPhilipp Reisner drbd_free_bc(mdev->ldev); 23761d041225SPhilipp Reisner mdev->ldev = NULL; 2377b411b363SPhilipp Reisner 23787721f567SAndreas Gruenbacher drbd_release_all_peer_reqs(mdev); 2379b411b363SPhilipp Reisner 2380b411b363SPhilipp Reisner lc_destroy(mdev->act_log); 2381b411b363SPhilipp Reisner lc_destroy(mdev->resync); 2382b411b363SPhilipp Reisner 2383b411b363SPhilipp Reisner kfree(mdev->p_uuid); 2384b411b363SPhilipp Reisner /* mdev->p_uuid = NULL; */ 2385b411b363SPhilipp Reisner 2386cd1d9950SPhilipp Reisner kfree(mdev->current_epoch); 2387cd1d9950SPhilipp Reisner if (mdev->bitmap) /* should no longer be there. */ 2388cd1d9950SPhilipp Reisner drbd_bm_cleanup(mdev); 2389cd1d9950SPhilipp Reisner __free_page(mdev->md_io_page); 2390cd1d9950SPhilipp Reisner put_disk(mdev->vdisk); 2391cd1d9950SPhilipp Reisner blk_cleanup_queue(mdev->rq_queue); 23929958c857SPhilipp Reisner kfree(mdev->rs_plan_s); 2393cd1d9950SPhilipp Reisner kfree(mdev); 23949dc9fbb3SPhilipp Reisner 23959dc9fbb3SPhilipp Reisner kref_put(&tconn->kref, &conn_destroy); 2396b411b363SPhilipp Reisner } 2397b411b363SPhilipp Reisner 2398b411b363SPhilipp Reisner static void drbd_cleanup(void) 2399b411b363SPhilipp Reisner { 2400b411b363SPhilipp Reisner unsigned int i; 240181a5d60eSPhilipp Reisner struct drbd_conf *mdev; 240281fa2e67SPhilipp Reisner struct drbd_tconn *tconn, *tmp; 2403b411b363SPhilipp Reisner 2404b411b363SPhilipp Reisner unregister_reboot_notifier(&drbd_notifier); 2405b411b363SPhilipp Reisner 240617a93f30SLars Ellenberg /* first remove proc, 240717a93f30SLars Ellenberg * drbdsetup uses it's presence to detect 240817a93f30SLars Ellenberg * whether DRBD is loaded. 240917a93f30SLars Ellenberg * If we would get stuck in proc removal, 241017a93f30SLars Ellenberg * but have netlink already deregistered, 241117a93f30SLars Ellenberg * some drbdsetup commands may wait forever 241217a93f30SLars Ellenberg * for an answer. 241317a93f30SLars Ellenberg */ 241417a93f30SLars Ellenberg if (drbd_proc) 241517a93f30SLars Ellenberg remove_proc_entry("drbd", NULL); 241617a93f30SLars Ellenberg 24173b98c0c2SLars Ellenberg drbd_genl_unregister(); 2418b411b363SPhilipp Reisner 241981fa2e67SPhilipp Reisner idr_for_each_entry(&minors, mdev, i) { 242081fa2e67SPhilipp Reisner idr_remove(&minors, mdev_to_minor(mdev)); 242181fa2e67SPhilipp Reisner idr_remove(&mdev->tconn->volumes, mdev->vnr); 242281fa2e67SPhilipp Reisner del_gendisk(mdev->vdisk); 2423c141ebdaSPhilipp Reisner /* synchronize_rcu(); No other threads running at this point */ 242481fa2e67SPhilipp Reisner kref_put(&mdev->kref, &drbd_minor_destroy); 242581fa2e67SPhilipp Reisner } 242681fa2e67SPhilipp Reisner 2427c141ebdaSPhilipp Reisner /* not _rcu since, no other updater anymore. Genl already unregistered */ 242881fa2e67SPhilipp Reisner list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) { 2429c141ebdaSPhilipp Reisner list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */ 2430c141ebdaSPhilipp Reisner /* synchronize_rcu(); */ 243181fa2e67SPhilipp Reisner kref_put(&tconn->kref, &conn_destroy); 243281fa2e67SPhilipp Reisner } 2433ff370e5aSPhilipp Reisner 2434b411b363SPhilipp Reisner drbd_destroy_mempools(); 2435b411b363SPhilipp Reisner unregister_blkdev(DRBD_MAJOR, "drbd"); 2436b411b363SPhilipp Reisner 243781a5d60eSPhilipp Reisner idr_destroy(&minors); 243881a5d60eSPhilipp Reisner 2439b411b363SPhilipp Reisner printk(KERN_INFO "drbd: module cleanup done.\n"); 2440b411b363SPhilipp Reisner } 2441b411b363SPhilipp Reisner 2442b411b363SPhilipp Reisner /** 2443b411b363SPhilipp Reisner * drbd_congested() - Callback for pdflush 2444b411b363SPhilipp Reisner * @congested_data: User data 2445b411b363SPhilipp Reisner * @bdi_bits: Bits pdflush is currently interested in 2446b411b363SPhilipp Reisner * 2447b411b363SPhilipp Reisner * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested. 2448b411b363SPhilipp Reisner */ 2449b411b363SPhilipp Reisner static int drbd_congested(void *congested_data, int bdi_bits) 2450b411b363SPhilipp Reisner { 2451b411b363SPhilipp Reisner struct drbd_conf *mdev = congested_data; 2452b411b363SPhilipp Reisner struct request_queue *q; 2453b411b363SPhilipp Reisner char reason = '-'; 2454b411b363SPhilipp Reisner int r = 0; 2455b411b363SPhilipp Reisner 24561b881ef7SAndreas Gruenbacher if (!may_inc_ap_bio(mdev)) { 2457b411b363SPhilipp Reisner /* DRBD has frozen IO */ 2458b411b363SPhilipp Reisner r = bdi_bits; 2459b411b363SPhilipp Reisner reason = 'd'; 2460b411b363SPhilipp Reisner goto out; 2461b411b363SPhilipp Reisner } 2462b411b363SPhilipp Reisner 2463b411b363SPhilipp Reisner if (get_ldev(mdev)) { 2464b411b363SPhilipp Reisner q = bdev_get_queue(mdev->ldev->backing_bdev); 2465b411b363SPhilipp Reisner r = bdi_congested(&q->backing_dev_info, bdi_bits); 2466b411b363SPhilipp Reisner put_ldev(mdev); 2467b411b363SPhilipp Reisner if (r) 2468b411b363SPhilipp Reisner reason = 'b'; 2469b411b363SPhilipp Reisner } 2470b411b363SPhilipp Reisner 247101a311a5SPhilipp Reisner if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) { 2472b411b363SPhilipp Reisner r |= (1 << BDI_async_congested); 2473b411b363SPhilipp Reisner reason = reason == 'b' ? 'a' : 'n'; 2474b411b363SPhilipp Reisner } 2475b411b363SPhilipp Reisner 2476b411b363SPhilipp Reisner out: 2477b411b363SPhilipp Reisner mdev->congestion_reason = reason; 2478b411b363SPhilipp Reisner return r; 2479b411b363SPhilipp Reisner } 2480b411b363SPhilipp Reisner 24816699b655SPhilipp Reisner static void drbd_init_workqueue(struct drbd_work_queue* wq) 24826699b655SPhilipp Reisner { 24836699b655SPhilipp Reisner sema_init(&wq->s, 0); 24846699b655SPhilipp Reisner spin_lock_init(&wq->q_lock); 24856699b655SPhilipp Reisner INIT_LIST_HEAD(&wq->q); 24866699b655SPhilipp Reisner } 24876699b655SPhilipp Reisner 24880ace9dfaSPhilipp Reisner struct drbd_tconn *conn_get_by_name(const char *name) 24891aba4d7fSPhilipp Reisner { 24901aba4d7fSPhilipp Reisner struct drbd_tconn *tconn; 24911aba4d7fSPhilipp Reisner 24923b98c0c2SLars Ellenberg if (!name || !name[0]) 24933b98c0c2SLars Ellenberg return NULL; 24943b98c0c2SLars Ellenberg 2495c141ebdaSPhilipp Reisner rcu_read_lock(); 2496ec0bddbcSPhilipp Reisner list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { 24970ace9dfaSPhilipp Reisner if (!strcmp(tconn->name, name)) { 24980ace9dfaSPhilipp Reisner kref_get(&tconn->kref); 24991aba4d7fSPhilipp Reisner goto found; 25001aba4d7fSPhilipp Reisner } 25010ace9dfaSPhilipp Reisner } 25021aba4d7fSPhilipp Reisner tconn = NULL; 25031aba4d7fSPhilipp Reisner found: 2504c141ebdaSPhilipp Reisner rcu_read_unlock(); 25051aba4d7fSPhilipp Reisner return tconn; 25061aba4d7fSPhilipp Reisner } 25071aba4d7fSPhilipp Reisner 2508089c075dSAndreas Gruenbacher struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len, 2509089c075dSAndreas Gruenbacher void *peer_addr, int peer_addr_len) 2510089c075dSAndreas Gruenbacher { 2511089c075dSAndreas Gruenbacher struct drbd_tconn *tconn; 2512089c075dSAndreas Gruenbacher 2513089c075dSAndreas Gruenbacher rcu_read_lock(); 2514089c075dSAndreas Gruenbacher list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) { 2515089c075dSAndreas Gruenbacher if (tconn->my_addr_len == my_addr_len && 2516089c075dSAndreas Gruenbacher tconn->peer_addr_len == peer_addr_len && 2517089c075dSAndreas Gruenbacher !memcmp(&tconn->my_addr, my_addr, my_addr_len) && 2518089c075dSAndreas Gruenbacher !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) { 2519089c075dSAndreas Gruenbacher kref_get(&tconn->kref); 2520089c075dSAndreas Gruenbacher goto found; 2521089c075dSAndreas Gruenbacher } 2522089c075dSAndreas Gruenbacher } 2523089c075dSAndreas Gruenbacher tconn = NULL; 2524089c075dSAndreas Gruenbacher found: 2525089c075dSAndreas Gruenbacher rcu_read_unlock(); 2526089c075dSAndreas Gruenbacher return tconn; 2527089c075dSAndreas Gruenbacher } 2528089c075dSAndreas Gruenbacher 2529e6ef8a5cSAndreas Gruenbacher static int drbd_alloc_socket(struct drbd_socket *socket) 2530e6ef8a5cSAndreas Gruenbacher { 2531e6ef8a5cSAndreas Gruenbacher socket->rbuf = (void *) __get_free_page(GFP_KERNEL); 2532e6ef8a5cSAndreas Gruenbacher if (!socket->rbuf) 2533e6ef8a5cSAndreas Gruenbacher return -ENOMEM; 25345a87d920SAndreas Gruenbacher socket->sbuf = (void *) __get_free_page(GFP_KERNEL); 25355a87d920SAndreas Gruenbacher if (!socket->sbuf) 25365a87d920SAndreas Gruenbacher return -ENOMEM; 2537e6ef8a5cSAndreas Gruenbacher return 0; 2538e6ef8a5cSAndreas Gruenbacher } 2539e6ef8a5cSAndreas Gruenbacher 2540e6ef8a5cSAndreas Gruenbacher static void drbd_free_socket(struct drbd_socket *socket) 2541e6ef8a5cSAndreas Gruenbacher { 25425a87d920SAndreas Gruenbacher free_page((unsigned long) socket->sbuf); 2543e6ef8a5cSAndreas Gruenbacher free_page((unsigned long) socket->rbuf); 2544e6ef8a5cSAndreas Gruenbacher } 2545e6ef8a5cSAndreas Gruenbacher 254691fd4dadSPhilipp Reisner void conn_free_crypto(struct drbd_tconn *tconn) 254791fd4dadSPhilipp Reisner { 25481d041225SPhilipp Reisner drbd_free_sock(tconn); 25491d041225SPhilipp Reisner 25501d041225SPhilipp Reisner crypto_free_hash(tconn->csums_tfm); 25511d041225SPhilipp Reisner crypto_free_hash(tconn->verify_tfm); 255291fd4dadSPhilipp Reisner crypto_free_hash(tconn->cram_hmac_tfm); 25538d412fc6SAndreas Gruenbacher crypto_free_hash(tconn->integrity_tfm); 25545b614abeSAndreas Gruenbacher crypto_free_hash(tconn->peer_integrity_tfm); 255591fd4dadSPhilipp Reisner kfree(tconn->int_dig_in); 255691fd4dadSPhilipp Reisner kfree(tconn->int_dig_vv); 25571d041225SPhilipp Reisner 25581d041225SPhilipp Reisner tconn->csums_tfm = NULL; 25591d041225SPhilipp Reisner tconn->verify_tfm = NULL; 256091fd4dadSPhilipp Reisner tconn->cram_hmac_tfm = NULL; 25618d412fc6SAndreas Gruenbacher tconn->integrity_tfm = NULL; 25625b614abeSAndreas Gruenbacher tconn->peer_integrity_tfm = NULL; 256391fd4dadSPhilipp Reisner tconn->int_dig_in = NULL; 256491fd4dadSPhilipp Reisner tconn->int_dig_vv = NULL; 256591fd4dadSPhilipp Reisner } 256691fd4dadSPhilipp Reisner 2567afbbfa88SAndreas Gruenbacher int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts) 2568afbbfa88SAndreas Gruenbacher { 2569afbbfa88SAndreas Gruenbacher cpumask_var_t new_cpu_mask; 2570afbbfa88SAndreas Gruenbacher int err; 2571afbbfa88SAndreas Gruenbacher 2572afbbfa88SAndreas Gruenbacher if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) 2573afbbfa88SAndreas Gruenbacher return -ENOMEM; 2574afbbfa88SAndreas Gruenbacher /* 2575afbbfa88SAndreas Gruenbacher retcode = ERR_NOMEM; 2576afbbfa88SAndreas Gruenbacher drbd_msg_put_info("unable to allocate cpumask"); 2577afbbfa88SAndreas Gruenbacher */ 2578afbbfa88SAndreas Gruenbacher 2579afbbfa88SAndreas Gruenbacher /* silently ignore cpu mask on UP kernel */ 2580afbbfa88SAndreas Gruenbacher if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { 2581afbbfa88SAndreas Gruenbacher /* FIXME: Get rid of constant 32 here */ 2582afbbfa88SAndreas Gruenbacher err = __bitmap_parse(res_opts->cpu_mask, 32, 0, 2583afbbfa88SAndreas Gruenbacher cpumask_bits(new_cpu_mask), nr_cpu_ids); 2584afbbfa88SAndreas Gruenbacher if (err) { 2585afbbfa88SAndreas Gruenbacher conn_warn(tconn, "__bitmap_parse() failed with %d\n", err); 2586afbbfa88SAndreas Gruenbacher /* retcode = ERR_CPU_MASK_PARSE; */ 2587afbbfa88SAndreas Gruenbacher goto fail; 2588afbbfa88SAndreas Gruenbacher } 2589afbbfa88SAndreas Gruenbacher } 2590afbbfa88SAndreas Gruenbacher tconn->res_opts = *res_opts; 2591afbbfa88SAndreas Gruenbacher if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) { 2592afbbfa88SAndreas Gruenbacher cpumask_copy(tconn->cpu_mask, new_cpu_mask); 2593afbbfa88SAndreas Gruenbacher drbd_calc_cpu_mask(tconn); 2594afbbfa88SAndreas Gruenbacher tconn->receiver.reset_cpu_mask = 1; 2595afbbfa88SAndreas Gruenbacher tconn->asender.reset_cpu_mask = 1; 2596afbbfa88SAndreas Gruenbacher tconn->worker.reset_cpu_mask = 1; 2597afbbfa88SAndreas Gruenbacher } 2598afbbfa88SAndreas Gruenbacher err = 0; 2599afbbfa88SAndreas Gruenbacher 2600afbbfa88SAndreas Gruenbacher fail: 2601afbbfa88SAndreas Gruenbacher free_cpumask_var(new_cpu_mask); 2602afbbfa88SAndreas Gruenbacher return err; 2603afbbfa88SAndreas Gruenbacher 2604afbbfa88SAndreas Gruenbacher } 2605afbbfa88SAndreas Gruenbacher 2606ec0bddbcSPhilipp Reisner /* caller must be under genl_lock() */ 2607afbbfa88SAndreas Gruenbacher struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts) 26082111438bSPhilipp Reisner { 26092111438bSPhilipp Reisner struct drbd_tconn *tconn; 26102111438bSPhilipp Reisner 26112111438bSPhilipp Reisner tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL); 26122111438bSPhilipp Reisner if (!tconn) 26132111438bSPhilipp Reisner return NULL; 26142111438bSPhilipp Reisner 26152111438bSPhilipp Reisner tconn->name = kstrdup(name, GFP_KERNEL); 26162111438bSPhilipp Reisner if (!tconn->name) 26172111438bSPhilipp Reisner goto fail; 26182111438bSPhilipp Reisner 2619e6ef8a5cSAndreas Gruenbacher if (drbd_alloc_socket(&tconn->data)) 2620e6ef8a5cSAndreas Gruenbacher goto fail; 2621e6ef8a5cSAndreas Gruenbacher if (drbd_alloc_socket(&tconn->meta)) 2622e6ef8a5cSAndreas Gruenbacher goto fail; 2623e6ef8a5cSAndreas Gruenbacher 2624774b3055SPhilipp Reisner if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) 2625774b3055SPhilipp Reisner goto fail; 2626774b3055SPhilipp Reisner 2627afbbfa88SAndreas Gruenbacher if (set_resource_options(tconn, res_opts)) 2628afbbfa88SAndreas Gruenbacher goto fail; 2629afbbfa88SAndreas Gruenbacher 26302f5cdd0bSPhilipp Reisner if (!tl_init(tconn)) 26312f5cdd0bSPhilipp Reisner goto fail; 26322f5cdd0bSPhilipp Reisner 2633bbeb641cSPhilipp Reisner tconn->cstate = C_STANDALONE; 26348410da8fSPhilipp Reisner mutex_init(&tconn->cstate_mutex); 26356699b655SPhilipp Reisner spin_lock_init(&tconn->req_lock); 2636a0095508SPhilipp Reisner mutex_init(&tconn->conf_update); 26372a67d8b9SPhilipp Reisner init_waitqueue_head(&tconn->ping_wait); 2638062e879cSPhilipp Reisner idr_init(&tconn->volumes); 2639b2fb6dbeSPhilipp Reisner 26406699b655SPhilipp Reisner drbd_init_workqueue(&tconn->data.work); 26416699b655SPhilipp Reisner mutex_init(&tconn->data.mutex); 26426699b655SPhilipp Reisner 26436699b655SPhilipp Reisner drbd_init_workqueue(&tconn->meta.work); 26446699b655SPhilipp Reisner mutex_init(&tconn->meta.mutex); 26456699b655SPhilipp Reisner 2646392c8801SPhilipp Reisner drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); 2647392c8801SPhilipp Reisner drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); 2648392c8801SPhilipp Reisner drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); 2649392c8801SPhilipp Reisner 26509dc9fbb3SPhilipp Reisner kref_init(&tconn->kref); 2651ec0bddbcSPhilipp Reisner list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns); 26522111438bSPhilipp Reisner 26532111438bSPhilipp Reisner return tconn; 26542111438bSPhilipp Reisner 26552111438bSPhilipp Reisner fail: 26562f5cdd0bSPhilipp Reisner tl_cleanup(tconn); 2657774b3055SPhilipp Reisner free_cpumask_var(tconn->cpu_mask); 2658e6ef8a5cSAndreas Gruenbacher drbd_free_socket(&tconn->meta); 2659e6ef8a5cSAndreas Gruenbacher drbd_free_socket(&tconn->data); 26602111438bSPhilipp Reisner kfree(tconn->name); 26612111438bSPhilipp Reisner kfree(tconn); 26622111438bSPhilipp Reisner 26632111438bSPhilipp Reisner return NULL; 26642111438bSPhilipp Reisner } 26652111438bSPhilipp Reisner 26669dc9fbb3SPhilipp Reisner void conn_destroy(struct kref *kref) 26672111438bSPhilipp Reisner { 26689dc9fbb3SPhilipp Reisner struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref); 26699dc9fbb3SPhilipp Reisner 2670062e879cSPhilipp Reisner idr_destroy(&tconn->volumes); 26712111438bSPhilipp Reisner 2672774b3055SPhilipp Reisner free_cpumask_var(tconn->cpu_mask); 2673e6ef8a5cSAndreas Gruenbacher drbd_free_socket(&tconn->meta); 2674e6ef8a5cSAndreas Gruenbacher drbd_free_socket(&tconn->data); 26752111438bSPhilipp Reisner kfree(tconn->name); 2676b42a70adSPhilipp Reisner kfree(tconn->int_dig_in); 2677b42a70adSPhilipp Reisner kfree(tconn->int_dig_vv); 26782111438bSPhilipp Reisner kfree(tconn); 26792111438bSPhilipp Reisner } 26802111438bSPhilipp Reisner 2681774b3055SPhilipp Reisner enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) 2682b411b363SPhilipp Reisner { 2683b411b363SPhilipp Reisner struct drbd_conf *mdev; 2684b411b363SPhilipp Reisner struct gendisk *disk; 2685b411b363SPhilipp Reisner struct request_queue *q; 2686774b3055SPhilipp Reisner int vnr_got = vnr; 268781a5d60eSPhilipp Reisner int minor_got = minor; 26888432b314SLars Ellenberg enum drbd_ret_code err = ERR_NOMEM; 2689774b3055SPhilipp Reisner 2690774b3055SPhilipp Reisner mdev = minor_to_mdev(minor); 2691774b3055SPhilipp Reisner if (mdev) 2692774b3055SPhilipp Reisner return ERR_MINOR_EXISTS; 2693b411b363SPhilipp Reisner 2694b411b363SPhilipp Reisner /* GFP_KERNEL, we are outside of all write-out paths */ 2695b411b363SPhilipp Reisner mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); 2696b411b363SPhilipp Reisner if (!mdev) 2697774b3055SPhilipp Reisner return ERR_NOMEM; 2698774b3055SPhilipp Reisner 26999dc9fbb3SPhilipp Reisner kref_get(&tconn->kref); 2700774b3055SPhilipp Reisner mdev->tconn = tconn; 27019dc9fbb3SPhilipp Reisner 2702b411b363SPhilipp Reisner mdev->minor = minor; 27033b98c0c2SLars Ellenberg mdev->vnr = vnr; 2704b411b363SPhilipp Reisner 2705b411b363SPhilipp Reisner drbd_init_set_defaults(mdev); 2706b411b363SPhilipp Reisner 2707b411b363SPhilipp Reisner q = blk_alloc_queue(GFP_KERNEL); 2708b411b363SPhilipp Reisner if (!q) 2709b411b363SPhilipp Reisner goto out_no_q; 2710b411b363SPhilipp Reisner mdev->rq_queue = q; 2711b411b363SPhilipp Reisner q->queuedata = mdev; 2712b411b363SPhilipp Reisner 2713b411b363SPhilipp Reisner disk = alloc_disk(1); 2714b411b363SPhilipp Reisner if (!disk) 2715b411b363SPhilipp Reisner goto out_no_disk; 2716b411b363SPhilipp Reisner mdev->vdisk = disk; 2717b411b363SPhilipp Reisner 271881e84650SAndreas Gruenbacher set_disk_ro(disk, true); 2719b411b363SPhilipp Reisner 2720b411b363SPhilipp Reisner disk->queue = q; 2721b411b363SPhilipp Reisner disk->major = DRBD_MAJOR; 2722b411b363SPhilipp Reisner disk->first_minor = minor; 2723b411b363SPhilipp Reisner disk->fops = &drbd_ops; 2724b411b363SPhilipp Reisner sprintf(disk->disk_name, "drbd%d", minor); 2725b411b363SPhilipp Reisner disk->private_data = mdev; 2726b411b363SPhilipp Reisner 2727b411b363SPhilipp Reisner mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); 2728b411b363SPhilipp Reisner /* we have no partitions. we contain only ourselves. */ 2729b411b363SPhilipp Reisner mdev->this_bdev->bd_contains = mdev->this_bdev; 2730b411b363SPhilipp Reisner 2731b411b363SPhilipp Reisner q->backing_dev_info.congested_fn = drbd_congested; 2732b411b363SPhilipp Reisner q->backing_dev_info.congested_data = mdev; 2733b411b363SPhilipp Reisner 27342f58dcfcSAndreas Gruenbacher blk_queue_make_request(q, drbd_make_request); 273599432fccSPhilipp Reisner /* Setting the max_hw_sectors to an odd value of 8kibyte here 273699432fccSPhilipp Reisner This triggers a max_bio_size message upon first attach or connect */ 273799432fccSPhilipp Reisner blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); 2738b411b363SPhilipp Reisner blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); 2739b411b363SPhilipp Reisner blk_queue_merge_bvec(q, drbd_merge_bvec); 274087eeee41SPhilipp Reisner q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */ 2741b411b363SPhilipp Reisner 2742b411b363SPhilipp Reisner mdev->md_io_page = alloc_page(GFP_KERNEL); 2743b411b363SPhilipp Reisner if (!mdev->md_io_page) 2744b411b363SPhilipp Reisner goto out_no_io_page; 2745b411b363SPhilipp Reisner 2746b411b363SPhilipp Reisner if (drbd_bm_init(mdev)) 2747b411b363SPhilipp Reisner goto out_no_bitmap; 2748dac1389cSAndreas Gruenbacher mdev->read_requests = RB_ROOT; 2749de696716SAndreas Gruenbacher mdev->write_requests = RB_ROOT; 2750b411b363SPhilipp Reisner 2751b411b363SPhilipp Reisner mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); 2752b411b363SPhilipp Reisner if (!mdev->current_epoch) 2753b411b363SPhilipp Reisner goto out_no_epoch; 2754b411b363SPhilipp Reisner 2755b411b363SPhilipp Reisner INIT_LIST_HEAD(&mdev->current_epoch->list); 2756b411b363SPhilipp Reisner mdev->epochs = 1; 2757b411b363SPhilipp Reisner 27588432b314SLars Ellenberg if (!idr_pre_get(&minors, GFP_KERNEL)) 27598432b314SLars Ellenberg goto out_no_minor_idr; 27608432b314SLars Ellenberg if (idr_get_new_above(&minors, mdev, minor, &minor_got)) 27618432b314SLars Ellenberg goto out_no_minor_idr; 27628432b314SLars Ellenberg if (minor_got != minor) { 27638432b314SLars Ellenberg err = ERR_MINOR_EXISTS; 27648432b314SLars Ellenberg drbd_msg_put_info("requested minor exists already"); 27658432b314SLars Ellenberg goto out_idr_remove_minor; 2766569083c0SLars Ellenberg } 2767569083c0SLars Ellenberg 27688432b314SLars Ellenberg if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) 2769569083c0SLars Ellenberg goto out_idr_remove_minor; 27708432b314SLars Ellenberg if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got)) 27718432b314SLars Ellenberg goto out_idr_remove_minor; 27728432b314SLars Ellenberg if (vnr_got != vnr) { 27738432b314SLars Ellenberg err = ERR_INVALID_REQUEST; 27748432b314SLars Ellenberg drbd_msg_put_info("requested volume exists already"); 27758432b314SLars Ellenberg goto out_idr_remove_vol; 277681a5d60eSPhilipp Reisner } 2777774b3055SPhilipp Reisner add_disk(disk); 277881fa2e67SPhilipp Reisner kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ 2779774b3055SPhilipp Reisner 27802325eb66SPhilipp Reisner /* inherit the connection state */ 27812325eb66SPhilipp Reisner mdev->state.conn = tconn->cstate; 27822325eb66SPhilipp Reisner if (mdev->state.conn == C_WF_REPORT_PARAMS) 2783c141ebdaSPhilipp Reisner drbd_connected(mdev); 27842325eb66SPhilipp Reisner 2785774b3055SPhilipp Reisner return NO_ERROR; 2786b411b363SPhilipp Reisner 2787569083c0SLars Ellenberg out_idr_remove_vol: 2788569083c0SLars Ellenberg idr_remove(&tconn->volumes, vnr_got); 27898432b314SLars Ellenberg out_idr_remove_minor: 27908432b314SLars Ellenberg idr_remove(&minors, minor_got); 2791569083c0SLars Ellenberg synchronize_rcu(); 27928432b314SLars Ellenberg out_no_minor_idr: 279381a5d60eSPhilipp Reisner kfree(mdev->current_epoch); 2794b411b363SPhilipp Reisner out_no_epoch: 2795b411b363SPhilipp Reisner drbd_bm_cleanup(mdev); 2796b411b363SPhilipp Reisner out_no_bitmap: 2797b411b363SPhilipp Reisner __free_page(mdev->md_io_page); 2798b411b363SPhilipp Reisner out_no_io_page: 2799b411b363SPhilipp Reisner put_disk(disk); 2800b411b363SPhilipp Reisner out_no_disk: 2801b411b363SPhilipp Reisner blk_cleanup_queue(q); 2802b411b363SPhilipp Reisner out_no_q: 2803b411b363SPhilipp Reisner kfree(mdev); 28049dc9fbb3SPhilipp Reisner kref_put(&tconn->kref, &conn_destroy); 28058432b314SLars Ellenberg return err; 2806b411b363SPhilipp Reisner } 2807b411b363SPhilipp Reisner 2808b411b363SPhilipp Reisner int __init drbd_init(void) 2809b411b363SPhilipp Reisner { 2810b411b363SPhilipp Reisner int err; 2811b411b363SPhilipp Reisner 28122b8a90b5SPhilipp Reisner if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { 2813b411b363SPhilipp Reisner printk(KERN_ERR 2814b411b363SPhilipp Reisner "drbd: invalid minor_count (%d)\n", minor_count); 2815b411b363SPhilipp Reisner #ifdef MODULE 2816b411b363SPhilipp Reisner return -EINVAL; 2817b411b363SPhilipp Reisner #else 281846530e85SAndreas Gruenbacher minor_count = DRBD_MINOR_COUNT_DEF; 2819b411b363SPhilipp Reisner #endif 2820b411b363SPhilipp Reisner } 2821b411b363SPhilipp Reisner 2822b411b363SPhilipp Reisner err = register_blkdev(DRBD_MAJOR, "drbd"); 2823b411b363SPhilipp Reisner if (err) { 2824b411b363SPhilipp Reisner printk(KERN_ERR 2825b411b363SPhilipp Reisner "drbd: unable to register block device major %d\n", 2826b411b363SPhilipp Reisner DRBD_MAJOR); 2827b411b363SPhilipp Reisner return err; 2828b411b363SPhilipp Reisner } 2829b411b363SPhilipp Reisner 28303b98c0c2SLars Ellenberg err = drbd_genl_register(); 28313b98c0c2SLars Ellenberg if (err) { 28323b98c0c2SLars Ellenberg printk(KERN_ERR "drbd: unable to register generic netlink family\n"); 28333b98c0c2SLars Ellenberg goto fail; 28343b98c0c2SLars Ellenberg } 28353b98c0c2SLars Ellenberg 28363b98c0c2SLars Ellenberg 2837b411b363SPhilipp Reisner register_reboot_notifier(&drbd_notifier); 2838b411b363SPhilipp Reisner 2839b411b363SPhilipp Reisner /* 2840b411b363SPhilipp Reisner * allocate all necessary structs 2841b411b363SPhilipp Reisner */ 2842b411b363SPhilipp Reisner err = -ENOMEM; 2843b411b363SPhilipp Reisner 2844b411b363SPhilipp Reisner init_waitqueue_head(&drbd_pp_wait); 2845b411b363SPhilipp Reisner 2846b411b363SPhilipp Reisner drbd_proc = NULL; /* play safe for drbd_cleanup */ 284781a5d60eSPhilipp Reisner idr_init(&minors); 2848b411b363SPhilipp Reisner 2849b411b363SPhilipp Reisner err = drbd_create_mempools(); 2850b411b363SPhilipp Reisner if (err) 28513b98c0c2SLars Ellenberg goto fail; 2852b411b363SPhilipp Reisner 28538c484ee4SLars Ellenberg drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); 2854b411b363SPhilipp Reisner if (!drbd_proc) { 2855b411b363SPhilipp Reisner printk(KERN_ERR "drbd: unable to register proc file\n"); 28563b98c0c2SLars Ellenberg goto fail; 2857b411b363SPhilipp Reisner } 2858b411b363SPhilipp Reisner 2859b411b363SPhilipp Reisner rwlock_init(&global_state_lock); 28602111438bSPhilipp Reisner INIT_LIST_HEAD(&drbd_tconns); 2861b411b363SPhilipp Reisner 2862b411b363SPhilipp Reisner printk(KERN_INFO "drbd: initialized. " 2863b411b363SPhilipp Reisner "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", 2864b411b363SPhilipp Reisner API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); 2865b411b363SPhilipp Reisner printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); 2866b411b363SPhilipp Reisner printk(KERN_INFO "drbd: registered as block device major %d\n", 2867b411b363SPhilipp Reisner DRBD_MAJOR); 2868b411b363SPhilipp Reisner 2869b411b363SPhilipp Reisner return 0; /* Success! */ 2870b411b363SPhilipp Reisner 28713b98c0c2SLars Ellenberg fail: 2872b411b363SPhilipp Reisner drbd_cleanup(); 2873b411b363SPhilipp Reisner if (err == -ENOMEM) 2874b411b363SPhilipp Reisner /* currently always the case */ 2875b411b363SPhilipp Reisner printk(KERN_ERR "drbd: ran out of memory\n"); 2876b411b363SPhilipp Reisner else 2877b411b363SPhilipp Reisner printk(KERN_ERR "drbd: initialization failure\n"); 2878b411b363SPhilipp Reisner return err; 2879b411b363SPhilipp Reisner } 2880b411b363SPhilipp Reisner 2881b411b363SPhilipp Reisner void drbd_free_bc(struct drbd_backing_dev *ldev) 2882b411b363SPhilipp Reisner { 2883b411b363SPhilipp Reisner if (ldev == NULL) 2884b411b363SPhilipp Reisner return; 2885b411b363SPhilipp Reisner 2886e525fd89STejun Heo blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 2887e525fd89STejun Heo blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 2888b411b363SPhilipp Reisner 2889b411b363SPhilipp Reisner kfree(ldev); 2890b411b363SPhilipp Reisner } 2891b411b363SPhilipp Reisner 2892360cc740SPhilipp Reisner void drbd_free_sock(struct drbd_tconn *tconn) 2893b411b363SPhilipp Reisner { 2894360cc740SPhilipp Reisner if (tconn->data.socket) { 2895360cc740SPhilipp Reisner mutex_lock(&tconn->data.mutex); 2896360cc740SPhilipp Reisner kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR); 2897360cc740SPhilipp Reisner sock_release(tconn->data.socket); 2898360cc740SPhilipp Reisner tconn->data.socket = NULL; 2899360cc740SPhilipp Reisner mutex_unlock(&tconn->data.mutex); 2900b411b363SPhilipp Reisner } 2901360cc740SPhilipp Reisner if (tconn->meta.socket) { 2902360cc740SPhilipp Reisner mutex_lock(&tconn->meta.mutex); 2903360cc740SPhilipp Reisner kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR); 2904360cc740SPhilipp Reisner sock_release(tconn->meta.socket); 2905360cc740SPhilipp Reisner tconn->meta.socket = NULL; 2906360cc740SPhilipp Reisner mutex_unlock(&tconn->meta.mutex); 2907b411b363SPhilipp Reisner } 2908b411b363SPhilipp Reisner } 2909b411b363SPhilipp Reisner 2910b411b363SPhilipp Reisner /* meta data management */ 2911b411b363SPhilipp Reisner 2912b411b363SPhilipp Reisner struct meta_data_on_disk { 2913b411b363SPhilipp Reisner u64 la_size; /* last agreed size. */ 2914b411b363SPhilipp Reisner u64 uuid[UI_SIZE]; /* UUIDs. */ 2915b411b363SPhilipp Reisner u64 device_uuid; 2916b411b363SPhilipp Reisner u64 reserved_u64_1; 2917b411b363SPhilipp Reisner u32 flags; /* MDF */ 2918b411b363SPhilipp Reisner u32 magic; 2919b411b363SPhilipp Reisner u32 md_size_sect; 2920b411b363SPhilipp Reisner u32 al_offset; /* offset to this block */ 2921b411b363SPhilipp Reisner u32 al_nr_extents; /* important for restoring the AL */ 2922f399002eSLars Ellenberg /* `-- act_log->nr_elements <-- ldev->dc.al_extents */ 2923b411b363SPhilipp Reisner u32 bm_offset; /* offset to the bitmap, from here */ 2924b411b363SPhilipp Reisner u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ 292599432fccSPhilipp Reisner u32 la_peer_max_bio_size; /* last peer max_bio_size */ 292699432fccSPhilipp Reisner u32 reserved_u32[3]; 2927b411b363SPhilipp Reisner 2928b411b363SPhilipp Reisner } __packed; 2929b411b363SPhilipp Reisner 2930b411b363SPhilipp Reisner /** 2931b411b363SPhilipp Reisner * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set 2932b411b363SPhilipp Reisner * @mdev: DRBD device. 2933b411b363SPhilipp Reisner */ 2934b411b363SPhilipp Reisner void drbd_md_sync(struct drbd_conf *mdev) 2935b411b363SPhilipp Reisner { 2936b411b363SPhilipp Reisner struct meta_data_on_disk *buffer; 2937b411b363SPhilipp Reisner sector_t sector; 2938b411b363SPhilipp Reisner int i; 2939b411b363SPhilipp Reisner 2940ee15b038SLars Ellenberg del_timer(&mdev->md_sync_timer); 2941ee15b038SLars Ellenberg /* timer may be rearmed by drbd_md_mark_dirty() now. */ 2942b411b363SPhilipp Reisner if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) 2943b411b363SPhilipp Reisner return; 2944b411b363SPhilipp Reisner 2945b411b363SPhilipp Reisner /* We use here D_FAILED and not D_ATTACHING because we try to write 2946b411b363SPhilipp Reisner * metadata even if we detach due to a disk failure! */ 2947b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_FAILED)) 2948b411b363SPhilipp Reisner return; 2949b411b363SPhilipp Reisner 2950cdfda633SPhilipp Reisner buffer = drbd_md_get_buffer(mdev); 2951cdfda633SPhilipp Reisner if (!buffer) 2952cdfda633SPhilipp Reisner goto out; 2953cdfda633SPhilipp Reisner 2954b411b363SPhilipp Reisner memset(buffer, 0, 512); 2955b411b363SPhilipp Reisner 2956b411b363SPhilipp Reisner buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); 2957b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_SIZE; i++) 2958b411b363SPhilipp Reisner buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); 2959b411b363SPhilipp Reisner buffer->flags = cpu_to_be32(mdev->ldev->md.flags); 2960d5d7ebd4SLars Ellenberg buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN); 2961b411b363SPhilipp Reisner 2962b411b363SPhilipp Reisner buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); 2963b411b363SPhilipp Reisner buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); 2964b411b363SPhilipp Reisner buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements); 2965b411b363SPhilipp Reisner buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); 2966b411b363SPhilipp Reisner buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); 2967b411b363SPhilipp Reisner 2968b411b363SPhilipp Reisner buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); 296999432fccSPhilipp Reisner buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); 2970b411b363SPhilipp Reisner 2971b411b363SPhilipp Reisner D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); 2972b411b363SPhilipp Reisner sector = mdev->ldev->md.md_offset; 2973b411b363SPhilipp Reisner 29743fbf4d21SAndreas Gruenbacher if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { 2975b411b363SPhilipp Reisner /* this was a try anyways ... */ 2976b411b363SPhilipp Reisner dev_err(DEV, "meta data update failed!\n"); 297781e84650SAndreas Gruenbacher drbd_chk_io_error(mdev, 1, true); 2978b411b363SPhilipp Reisner } 2979b411b363SPhilipp Reisner 2980b411b363SPhilipp Reisner /* Update mdev->ldev->md.la_size_sect, 2981b411b363SPhilipp Reisner * since we updated it on metadata. */ 2982b411b363SPhilipp Reisner mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); 2983b411b363SPhilipp Reisner 2984cdfda633SPhilipp Reisner drbd_md_put_buffer(mdev); 2985cdfda633SPhilipp Reisner out: 2986b411b363SPhilipp Reisner put_ldev(mdev); 2987b411b363SPhilipp Reisner } 2988b411b363SPhilipp Reisner 2989b411b363SPhilipp Reisner /** 2990b411b363SPhilipp Reisner * drbd_md_read() - Reads in the meta data super block 2991b411b363SPhilipp Reisner * @mdev: DRBD device. 2992b411b363SPhilipp Reisner * @bdev: Device from which the meta data should be read in. 2993b411b363SPhilipp Reisner * 2994116676caSAndreas Gruenbacher * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case 2995d5d7ebd4SLars Ellenberg * something goes wrong. 2996b411b363SPhilipp Reisner */ 2997b411b363SPhilipp Reisner int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 2998b411b363SPhilipp Reisner { 2999b411b363SPhilipp Reisner struct meta_data_on_disk *buffer; 3000d5d7ebd4SLars Ellenberg u32 magic, flags; 3001b411b363SPhilipp Reisner int i, rv = NO_ERROR; 3002b411b363SPhilipp Reisner 3003b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_ATTACHING)) 3004b411b363SPhilipp Reisner return ERR_IO_MD_DISK; 3005b411b363SPhilipp Reisner 3006cdfda633SPhilipp Reisner buffer = drbd_md_get_buffer(mdev); 3007cdfda633SPhilipp Reisner if (!buffer) 3008cdfda633SPhilipp Reisner goto out; 3009b411b363SPhilipp Reisner 30103fbf4d21SAndreas Gruenbacher if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { 301125985edcSLucas De Marchi /* NOTE: can't do normal error processing here as this is 3012b411b363SPhilipp Reisner called BEFORE disk is attached */ 3013b411b363SPhilipp Reisner dev_err(DEV, "Error while reading metadata.\n"); 3014b411b363SPhilipp Reisner rv = ERR_IO_MD_DISK; 3015b411b363SPhilipp Reisner goto err; 3016b411b363SPhilipp Reisner } 3017b411b363SPhilipp Reisner 3018d5d7ebd4SLars Ellenberg magic = be32_to_cpu(buffer->magic); 3019d5d7ebd4SLars Ellenberg flags = be32_to_cpu(buffer->flags); 3020d5d7ebd4SLars Ellenberg if (magic == DRBD_MD_MAGIC_84_UNCLEAN || 3021d5d7ebd4SLars Ellenberg (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) { 3022d5d7ebd4SLars Ellenberg /* btw: that's Activity Log clean, not "all" clean. */ 3023d5d7ebd4SLars Ellenberg dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n"); 3024d5d7ebd4SLars Ellenberg rv = ERR_MD_UNCLEAN; 3025d5d7ebd4SLars Ellenberg goto err; 3026d5d7ebd4SLars Ellenberg } 3027d5d7ebd4SLars Ellenberg if (magic != DRBD_MD_MAGIC_08) { 3028d5d7ebd4SLars Ellenberg if (magic == DRBD_MD_MAGIC_07) 3029d5d7ebd4SLars Ellenberg dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n"); 3030d5d7ebd4SLars Ellenberg else 3031d5d7ebd4SLars Ellenberg dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n"); 3032b411b363SPhilipp Reisner rv = ERR_MD_INVALID; 3033b411b363SPhilipp Reisner goto err; 3034b411b363SPhilipp Reisner } 3035b411b363SPhilipp Reisner if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { 3036b411b363SPhilipp Reisner dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", 3037b411b363SPhilipp Reisner be32_to_cpu(buffer->al_offset), bdev->md.al_offset); 3038b411b363SPhilipp Reisner rv = ERR_MD_INVALID; 3039b411b363SPhilipp Reisner goto err; 3040b411b363SPhilipp Reisner } 3041b411b363SPhilipp Reisner if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { 3042b411b363SPhilipp Reisner dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", 3043b411b363SPhilipp Reisner be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); 3044b411b363SPhilipp Reisner rv = ERR_MD_INVALID; 3045b411b363SPhilipp Reisner goto err; 3046b411b363SPhilipp Reisner } 3047b411b363SPhilipp Reisner if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { 3048b411b363SPhilipp Reisner dev_err(DEV, "unexpected md_size: %u (expected %u)\n", 3049b411b363SPhilipp Reisner be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); 3050b411b363SPhilipp Reisner rv = ERR_MD_INVALID; 3051b411b363SPhilipp Reisner goto err; 3052b411b363SPhilipp Reisner } 3053b411b363SPhilipp Reisner 3054b411b363SPhilipp Reisner if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { 3055b411b363SPhilipp Reisner dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", 3056b411b363SPhilipp Reisner be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); 3057b411b363SPhilipp Reisner rv = ERR_MD_INVALID; 3058b411b363SPhilipp Reisner goto err; 3059b411b363SPhilipp Reisner } 3060b411b363SPhilipp Reisner 3061b411b363SPhilipp Reisner bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); 3062b411b363SPhilipp Reisner for (i = UI_CURRENT; i < UI_SIZE; i++) 3063b411b363SPhilipp Reisner bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); 3064b411b363SPhilipp Reisner bdev->md.flags = be32_to_cpu(buffer->flags); 3065b411b363SPhilipp Reisner bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); 3066b411b363SPhilipp Reisner 306787eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 306899432fccSPhilipp Reisner if (mdev->state.conn < C_CONNECTED) { 306999432fccSPhilipp Reisner int peer; 307099432fccSPhilipp Reisner peer = be32_to_cpu(buffer->la_peer_max_bio_size); 307199432fccSPhilipp Reisner peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); 307299432fccSPhilipp Reisner mdev->peer_max_bio_size = peer; 307399432fccSPhilipp Reisner } 307487eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 307599432fccSPhilipp Reisner 3076b411b363SPhilipp Reisner err: 3077cdfda633SPhilipp Reisner drbd_md_put_buffer(mdev); 3078cdfda633SPhilipp Reisner out: 3079b411b363SPhilipp Reisner put_ldev(mdev); 3080b411b363SPhilipp Reisner 3081b411b363SPhilipp Reisner return rv; 3082b411b363SPhilipp Reisner } 3083b411b363SPhilipp Reisner 3084b411b363SPhilipp Reisner /** 3085b411b363SPhilipp Reisner * drbd_md_mark_dirty() - Mark meta data super block as dirty 3086b411b363SPhilipp Reisner * @mdev: DRBD device. 3087b411b363SPhilipp Reisner * 3088b411b363SPhilipp Reisner * Call this function if you change anything that should be written to 3089b411b363SPhilipp Reisner * the meta-data super block. This function sets MD_DIRTY, and starts a 3090b411b363SPhilipp Reisner * timer that ensures that within five seconds you have to call drbd_md_sync(). 3091b411b363SPhilipp Reisner */ 3092ca0e6098SLars Ellenberg #ifdef DEBUG 3093ee15b038SLars Ellenberg void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func) 3094ee15b038SLars Ellenberg { 3095ee15b038SLars Ellenberg if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) { 3096ee15b038SLars Ellenberg mod_timer(&mdev->md_sync_timer, jiffies + HZ); 3097ee15b038SLars Ellenberg mdev->last_md_mark_dirty.line = line; 3098ee15b038SLars Ellenberg mdev->last_md_mark_dirty.func = func; 3099ee15b038SLars Ellenberg } 3100ee15b038SLars Ellenberg } 3101ee15b038SLars Ellenberg #else 3102b411b363SPhilipp Reisner void drbd_md_mark_dirty(struct drbd_conf *mdev) 3103b411b363SPhilipp Reisner { 3104ee15b038SLars Ellenberg if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) 3105b411b363SPhilipp Reisner mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); 3106b411b363SPhilipp Reisner } 3107ee15b038SLars Ellenberg #endif 3108b411b363SPhilipp Reisner 3109b411b363SPhilipp Reisner static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) 3110b411b363SPhilipp Reisner { 3111b411b363SPhilipp Reisner int i; 3112b411b363SPhilipp Reisner 311362b0da3aSLars Ellenberg for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) 3114b411b363SPhilipp Reisner mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; 3115b411b363SPhilipp Reisner } 3116b411b363SPhilipp Reisner 3117b411b363SPhilipp Reisner void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) 3118b411b363SPhilipp Reisner { 3119b411b363SPhilipp Reisner if (idx == UI_CURRENT) { 3120b411b363SPhilipp Reisner if (mdev->state.role == R_PRIMARY) 3121b411b363SPhilipp Reisner val |= 1; 3122b411b363SPhilipp Reisner else 3123b411b363SPhilipp Reisner val &= ~((u64)1); 3124b411b363SPhilipp Reisner 3125b411b363SPhilipp Reisner drbd_set_ed_uuid(mdev, val); 3126b411b363SPhilipp Reisner } 3127b411b363SPhilipp Reisner 3128b411b363SPhilipp Reisner mdev->ldev->md.uuid[idx] = val; 3129b411b363SPhilipp Reisner drbd_md_mark_dirty(mdev); 3130b411b363SPhilipp Reisner } 3131b411b363SPhilipp Reisner 3132b411b363SPhilipp Reisner 3133b411b363SPhilipp Reisner void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) 3134b411b363SPhilipp Reisner { 3135b411b363SPhilipp Reisner if (mdev->ldev->md.uuid[idx]) { 3136b411b363SPhilipp Reisner drbd_uuid_move_history(mdev); 3137b411b363SPhilipp Reisner mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; 3138b411b363SPhilipp Reisner } 3139b411b363SPhilipp Reisner _drbd_uuid_set(mdev, idx, val); 3140b411b363SPhilipp Reisner } 3141b411b363SPhilipp Reisner 3142b411b363SPhilipp Reisner /** 3143b411b363SPhilipp Reisner * drbd_uuid_new_current() - Creates a new current UUID 3144b411b363SPhilipp Reisner * @mdev: DRBD device. 3145b411b363SPhilipp Reisner * 3146b411b363SPhilipp Reisner * Creates a new current UUID, and rotates the old current UUID into 3147b411b363SPhilipp Reisner * the bitmap slot. Causes an incremental resync upon next connect. 3148b411b363SPhilipp Reisner */ 3149b411b363SPhilipp Reisner void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) 3150b411b363SPhilipp Reisner { 3151b411b363SPhilipp Reisner u64 val; 315262b0da3aSLars Ellenberg unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; 3153b411b363SPhilipp Reisner 315462b0da3aSLars Ellenberg if (bm_uuid) 315562b0da3aSLars Ellenberg dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); 315662b0da3aSLars Ellenberg 3157b411b363SPhilipp Reisner mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; 3158b411b363SPhilipp Reisner 3159b411b363SPhilipp Reisner get_random_bytes(&val, sizeof(u64)); 3160b411b363SPhilipp Reisner _drbd_uuid_set(mdev, UI_CURRENT, val); 316162b0da3aSLars Ellenberg drbd_print_uuids(mdev, "new current UUID"); 3162aaa8e2b3SLars Ellenberg /* get it to stable storage _now_ */ 3163aaa8e2b3SLars Ellenberg drbd_md_sync(mdev); 3164b411b363SPhilipp Reisner } 3165b411b363SPhilipp Reisner 3166b411b363SPhilipp Reisner void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) 3167b411b363SPhilipp Reisner { 3168b411b363SPhilipp Reisner if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) 3169b411b363SPhilipp Reisner return; 3170b411b363SPhilipp Reisner 3171b411b363SPhilipp Reisner if (val == 0) { 3172b411b363SPhilipp Reisner drbd_uuid_move_history(mdev); 3173b411b363SPhilipp Reisner mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; 3174b411b363SPhilipp Reisner mdev->ldev->md.uuid[UI_BITMAP] = 0; 3175b411b363SPhilipp Reisner } else { 317662b0da3aSLars Ellenberg unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP]; 317762b0da3aSLars Ellenberg if (bm_uuid) 317862b0da3aSLars Ellenberg dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid); 3179b411b363SPhilipp Reisner 318062b0da3aSLars Ellenberg mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1); 3181b411b363SPhilipp Reisner } 3182b411b363SPhilipp Reisner drbd_md_mark_dirty(mdev); 3183b411b363SPhilipp Reisner } 3184b411b363SPhilipp Reisner 3185b411b363SPhilipp Reisner /** 3186b411b363SPhilipp Reisner * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() 3187b411b363SPhilipp Reisner * @mdev: DRBD device. 3188b411b363SPhilipp Reisner * 3189b411b363SPhilipp Reisner * Sets all bits in the bitmap and writes the whole bitmap to stable storage. 3190b411b363SPhilipp Reisner */ 3191b411b363SPhilipp Reisner int drbd_bmio_set_n_write(struct drbd_conf *mdev) 3192b411b363SPhilipp Reisner { 3193b411b363SPhilipp Reisner int rv = -EIO; 3194b411b363SPhilipp Reisner 3195b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_ATTACHING)) { 3196b411b363SPhilipp Reisner drbd_md_set_flag(mdev, MDF_FULL_SYNC); 3197b411b363SPhilipp Reisner drbd_md_sync(mdev); 3198b411b363SPhilipp Reisner drbd_bm_set_all(mdev); 3199b411b363SPhilipp Reisner 3200b411b363SPhilipp Reisner rv = drbd_bm_write(mdev); 3201b411b363SPhilipp Reisner 3202b411b363SPhilipp Reisner if (!rv) { 3203b411b363SPhilipp Reisner drbd_md_clear_flag(mdev, MDF_FULL_SYNC); 3204b411b363SPhilipp Reisner drbd_md_sync(mdev); 3205b411b363SPhilipp Reisner } 3206b411b363SPhilipp Reisner 3207b411b363SPhilipp Reisner put_ldev(mdev); 3208b411b363SPhilipp Reisner } 3209b411b363SPhilipp Reisner 3210b411b363SPhilipp Reisner return rv; 3211b411b363SPhilipp Reisner } 3212b411b363SPhilipp Reisner 3213b411b363SPhilipp Reisner /** 3214b411b363SPhilipp Reisner * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() 3215b411b363SPhilipp Reisner * @mdev: DRBD device. 3216b411b363SPhilipp Reisner * 3217b411b363SPhilipp Reisner * Clears all bits in the bitmap and writes the whole bitmap to stable storage. 3218b411b363SPhilipp Reisner */ 3219b411b363SPhilipp Reisner int drbd_bmio_clear_n_write(struct drbd_conf *mdev) 3220b411b363SPhilipp Reisner { 3221b411b363SPhilipp Reisner int rv = -EIO; 3222b411b363SPhilipp Reisner 32230778286aSPhilipp Reisner drbd_resume_al(mdev); 3224b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_ATTACHING)) { 3225b411b363SPhilipp Reisner drbd_bm_clear_all(mdev); 3226b411b363SPhilipp Reisner rv = drbd_bm_write(mdev); 3227b411b363SPhilipp Reisner put_ldev(mdev); 3228b411b363SPhilipp Reisner } 3229b411b363SPhilipp Reisner 3230b411b363SPhilipp Reisner return rv; 3231b411b363SPhilipp Reisner } 3232b411b363SPhilipp Reisner 323399920dc5SAndreas Gruenbacher static int w_bitmap_io(struct drbd_work *w, int unused) 3234b411b363SPhilipp Reisner { 3235b411b363SPhilipp Reisner struct bm_io_work *work = container_of(w, struct bm_io_work, w); 323600d56944SPhilipp Reisner struct drbd_conf *mdev = w->mdev; 323702851e9fSLars Ellenberg int rv = -EIO; 3238b411b363SPhilipp Reisner 3239b411b363SPhilipp Reisner D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); 3240b411b363SPhilipp Reisner 324102851e9fSLars Ellenberg if (get_ldev(mdev)) { 324220ceb2b2SLars Ellenberg drbd_bm_lock(mdev, work->why, work->flags); 3243b411b363SPhilipp Reisner rv = work->io_fn(mdev); 3244b411b363SPhilipp Reisner drbd_bm_unlock(mdev); 324502851e9fSLars Ellenberg put_ldev(mdev); 324602851e9fSLars Ellenberg } 3247b411b363SPhilipp Reisner 32484738fa16SLars Ellenberg clear_bit_unlock(BITMAP_IO, &mdev->flags); 3249b411b363SPhilipp Reisner wake_up(&mdev->misc_wait); 3250b411b363SPhilipp Reisner 3251b411b363SPhilipp Reisner if (work->done) 3252b411b363SPhilipp Reisner work->done(mdev, rv); 3253b411b363SPhilipp Reisner 3254b411b363SPhilipp Reisner clear_bit(BITMAP_IO_QUEUED, &mdev->flags); 3255b411b363SPhilipp Reisner work->why = NULL; 325620ceb2b2SLars Ellenberg work->flags = 0; 3257b411b363SPhilipp Reisner 325899920dc5SAndreas Gruenbacher return 0; 3259b411b363SPhilipp Reisner } 3260b411b363SPhilipp Reisner 326182f59cc6SLars Ellenberg void drbd_ldev_destroy(struct drbd_conf *mdev) 326282f59cc6SLars Ellenberg { 326382f59cc6SLars Ellenberg lc_destroy(mdev->resync); 326482f59cc6SLars Ellenberg mdev->resync = NULL; 326582f59cc6SLars Ellenberg lc_destroy(mdev->act_log); 326682f59cc6SLars Ellenberg mdev->act_log = NULL; 326782f59cc6SLars Ellenberg __no_warn(local, 326882f59cc6SLars Ellenberg drbd_free_bc(mdev->ldev); 326982f59cc6SLars Ellenberg mdev->ldev = NULL;); 327082f59cc6SLars Ellenberg 327182f59cc6SLars Ellenberg clear_bit(GO_DISKLESS, &mdev->flags); 327282f59cc6SLars Ellenberg } 327382f59cc6SLars Ellenberg 327499920dc5SAndreas Gruenbacher static int w_go_diskless(struct drbd_work *w, int unused) 3275e9e6f3ecSLars Ellenberg { 327600d56944SPhilipp Reisner struct drbd_conf *mdev = w->mdev; 327700d56944SPhilipp Reisner 3278e9e6f3ecSLars Ellenberg D_ASSERT(mdev->state.disk == D_FAILED); 32799d282875SLars Ellenberg /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will 32809d282875SLars Ellenberg * inc/dec it frequently. Once we are D_DISKLESS, no one will touch 328182f59cc6SLars Ellenberg * the protected members anymore, though, so once put_ldev reaches zero 328282f59cc6SLars Ellenberg * again, it will be safe to free them. */ 3283e9e6f3ecSLars Ellenberg drbd_force_state(mdev, NS(disk, D_DISKLESS)); 328499920dc5SAndreas Gruenbacher return 0; 3285e9e6f3ecSLars Ellenberg } 3286e9e6f3ecSLars Ellenberg 3287e9e6f3ecSLars Ellenberg void drbd_go_diskless(struct drbd_conf *mdev) 3288e9e6f3ecSLars Ellenberg { 3289e9e6f3ecSLars Ellenberg D_ASSERT(mdev->state.disk == D_FAILED); 3290e9e6f3ecSLars Ellenberg if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) 3291e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless); 3292e9e6f3ecSLars Ellenberg } 3293e9e6f3ecSLars Ellenberg 3294b411b363SPhilipp Reisner /** 3295b411b363SPhilipp Reisner * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap 3296b411b363SPhilipp Reisner * @mdev: DRBD device. 3297b411b363SPhilipp Reisner * @io_fn: IO callback to be called when bitmap IO is possible 3298b411b363SPhilipp Reisner * @done: callback to be called after the bitmap IO was performed 3299b411b363SPhilipp Reisner * @why: Descriptive text of the reason for doing the IO 3300b411b363SPhilipp Reisner * 3301b411b363SPhilipp Reisner * While IO on the bitmap happens we freeze application IO thus we ensure 3302b411b363SPhilipp Reisner * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be 3303b411b363SPhilipp Reisner * called from worker context. It MUST NOT be used while a previous such 3304b411b363SPhilipp Reisner * work is still pending! 3305b411b363SPhilipp Reisner */ 3306b411b363SPhilipp Reisner void drbd_queue_bitmap_io(struct drbd_conf *mdev, 3307b411b363SPhilipp Reisner int (*io_fn)(struct drbd_conf *), 3308b411b363SPhilipp Reisner void (*done)(struct drbd_conf *, int), 330920ceb2b2SLars Ellenberg char *why, enum bm_flag flags) 3310b411b363SPhilipp Reisner { 3311e6b3ea83SPhilipp Reisner D_ASSERT(current == mdev->tconn->worker.task); 3312b411b363SPhilipp Reisner 3313b411b363SPhilipp Reisner D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); 3314b411b363SPhilipp Reisner D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); 3315b411b363SPhilipp Reisner D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); 3316b411b363SPhilipp Reisner if (mdev->bm_io_work.why) 3317b411b363SPhilipp Reisner dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n", 3318b411b363SPhilipp Reisner why, mdev->bm_io_work.why); 3319b411b363SPhilipp Reisner 3320b411b363SPhilipp Reisner mdev->bm_io_work.io_fn = io_fn; 3321b411b363SPhilipp Reisner mdev->bm_io_work.done = done; 3322b411b363SPhilipp Reisner mdev->bm_io_work.why = why; 332320ceb2b2SLars Ellenberg mdev->bm_io_work.flags = flags; 3324b411b363SPhilipp Reisner 332587eeee41SPhilipp Reisner spin_lock_irq(&mdev->tconn->req_lock); 3326b411b363SPhilipp Reisner set_bit(BITMAP_IO, &mdev->flags); 3327b411b363SPhilipp Reisner if (atomic_read(&mdev->ap_bio_cnt) == 0) { 3328127b3178SPhilipp Reisner if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) 3329e42325a5SPhilipp Reisner drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); 3330b411b363SPhilipp Reisner } 333187eeee41SPhilipp Reisner spin_unlock_irq(&mdev->tconn->req_lock); 3332b411b363SPhilipp Reisner } 3333b411b363SPhilipp Reisner 3334b411b363SPhilipp Reisner /** 3335b411b363SPhilipp Reisner * drbd_bitmap_io() - Does an IO operation on the whole bitmap 3336b411b363SPhilipp Reisner * @mdev: DRBD device. 3337b411b363SPhilipp Reisner * @io_fn: IO callback to be called when bitmap IO is possible 3338b411b363SPhilipp Reisner * @why: Descriptive text of the reason for doing the IO 3339b411b363SPhilipp Reisner * 3340b411b363SPhilipp Reisner * freezes application IO while that the actual IO operations runs. This 3341b411b363SPhilipp Reisner * functions MAY NOT be called from worker context. 3342b411b363SPhilipp Reisner */ 334320ceb2b2SLars Ellenberg int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), 334420ceb2b2SLars Ellenberg char *why, enum bm_flag flags) 3345b411b363SPhilipp Reisner { 3346b411b363SPhilipp Reisner int rv; 3347b411b363SPhilipp Reisner 3348e6b3ea83SPhilipp Reisner D_ASSERT(current != mdev->tconn->worker.task); 3349b411b363SPhilipp Reisner 335020ceb2b2SLars Ellenberg if ((flags & BM_LOCKED_SET_ALLOWED) == 0) 3351b411b363SPhilipp Reisner drbd_suspend_io(mdev); 3352b411b363SPhilipp Reisner 335320ceb2b2SLars Ellenberg drbd_bm_lock(mdev, why, flags); 3354b411b363SPhilipp Reisner rv = io_fn(mdev); 3355b411b363SPhilipp Reisner drbd_bm_unlock(mdev); 3356b411b363SPhilipp Reisner 335720ceb2b2SLars Ellenberg if ((flags & BM_LOCKED_SET_ALLOWED) == 0) 3358b411b363SPhilipp Reisner drbd_resume_io(mdev); 3359b411b363SPhilipp Reisner 3360b411b363SPhilipp Reisner return rv; 3361b411b363SPhilipp Reisner } 3362b411b363SPhilipp Reisner 3363b411b363SPhilipp Reisner void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local) 3364b411b363SPhilipp Reisner { 3365b411b363SPhilipp Reisner if ((mdev->ldev->md.flags & flag) != flag) { 3366b411b363SPhilipp Reisner drbd_md_mark_dirty(mdev); 3367b411b363SPhilipp Reisner mdev->ldev->md.flags |= flag; 3368b411b363SPhilipp Reisner } 3369b411b363SPhilipp Reisner } 3370b411b363SPhilipp Reisner 3371b411b363SPhilipp Reisner void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local) 3372b411b363SPhilipp Reisner { 3373b411b363SPhilipp Reisner if ((mdev->ldev->md.flags & flag) != 0) { 3374b411b363SPhilipp Reisner drbd_md_mark_dirty(mdev); 3375b411b363SPhilipp Reisner mdev->ldev->md.flags &= ~flag; 3376b411b363SPhilipp Reisner } 3377b411b363SPhilipp Reisner } 3378b411b363SPhilipp Reisner int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) 3379b411b363SPhilipp Reisner { 3380b411b363SPhilipp Reisner return (bdev->md.flags & flag) != 0; 3381b411b363SPhilipp Reisner } 3382b411b363SPhilipp Reisner 3383b411b363SPhilipp Reisner static void md_sync_timer_fn(unsigned long data) 3384b411b363SPhilipp Reisner { 3385b411b363SPhilipp Reisner struct drbd_conf *mdev = (struct drbd_conf *) data; 3386b411b363SPhilipp Reisner 3387e42325a5SPhilipp Reisner drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); 3388b411b363SPhilipp Reisner } 3389b411b363SPhilipp Reisner 339099920dc5SAndreas Gruenbacher static int w_md_sync(struct drbd_work *w, int unused) 3391b411b363SPhilipp Reisner { 339200d56944SPhilipp Reisner struct drbd_conf *mdev = w->mdev; 339300d56944SPhilipp Reisner 3394b411b363SPhilipp Reisner dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); 3395ee15b038SLars Ellenberg #ifdef DEBUG 3396ee15b038SLars Ellenberg dev_warn(DEV, "last md_mark_dirty: %s:%u\n", 3397ee15b038SLars Ellenberg mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line); 3398ee15b038SLars Ellenberg #endif 3399b411b363SPhilipp Reisner drbd_md_sync(mdev); 340099920dc5SAndreas Gruenbacher return 0; 3401b411b363SPhilipp Reisner } 3402b411b363SPhilipp Reisner 3403d8763023SAndreas Gruenbacher const char *cmdname(enum drbd_packet cmd) 3404f2ad9063SAndreas Gruenbacher { 3405f2ad9063SAndreas Gruenbacher /* THINK may need to become several global tables 3406f2ad9063SAndreas Gruenbacher * when we want to support more than 3407f2ad9063SAndreas Gruenbacher * one PRO_VERSION */ 3408f2ad9063SAndreas Gruenbacher static const char *cmdnames[] = { 3409f2ad9063SAndreas Gruenbacher [P_DATA] = "Data", 3410f2ad9063SAndreas Gruenbacher [P_DATA_REPLY] = "DataReply", 3411f2ad9063SAndreas Gruenbacher [P_RS_DATA_REPLY] = "RSDataReply", 3412f2ad9063SAndreas Gruenbacher [P_BARRIER] = "Barrier", 3413f2ad9063SAndreas Gruenbacher [P_BITMAP] = "ReportBitMap", 3414f2ad9063SAndreas Gruenbacher [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", 3415f2ad9063SAndreas Gruenbacher [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", 3416f2ad9063SAndreas Gruenbacher [P_UNPLUG_REMOTE] = "UnplugRemote", 3417f2ad9063SAndreas Gruenbacher [P_DATA_REQUEST] = "DataRequest", 3418f2ad9063SAndreas Gruenbacher [P_RS_DATA_REQUEST] = "RSDataRequest", 3419f2ad9063SAndreas Gruenbacher [P_SYNC_PARAM] = "SyncParam", 3420f2ad9063SAndreas Gruenbacher [P_SYNC_PARAM89] = "SyncParam89", 3421f2ad9063SAndreas Gruenbacher [P_PROTOCOL] = "ReportProtocol", 3422f2ad9063SAndreas Gruenbacher [P_UUIDS] = "ReportUUIDs", 3423f2ad9063SAndreas Gruenbacher [P_SIZES] = "ReportSizes", 3424f2ad9063SAndreas Gruenbacher [P_STATE] = "ReportState", 3425f2ad9063SAndreas Gruenbacher [P_SYNC_UUID] = "ReportSyncUUID", 3426f2ad9063SAndreas Gruenbacher [P_AUTH_CHALLENGE] = "AuthChallenge", 3427f2ad9063SAndreas Gruenbacher [P_AUTH_RESPONSE] = "AuthResponse", 3428f2ad9063SAndreas Gruenbacher [P_PING] = "Ping", 3429f2ad9063SAndreas Gruenbacher [P_PING_ACK] = "PingAck", 3430f2ad9063SAndreas Gruenbacher [P_RECV_ACK] = "RecvAck", 3431f2ad9063SAndreas Gruenbacher [P_WRITE_ACK] = "WriteAck", 3432f2ad9063SAndreas Gruenbacher [P_RS_WRITE_ACK] = "RSWriteAck", 34337be8da07SAndreas Gruenbacher [P_DISCARD_WRITE] = "DiscardWrite", 3434f2ad9063SAndreas Gruenbacher [P_NEG_ACK] = "NegAck", 3435f2ad9063SAndreas Gruenbacher [P_NEG_DREPLY] = "NegDReply", 3436f2ad9063SAndreas Gruenbacher [P_NEG_RS_DREPLY] = "NegRSDReply", 3437f2ad9063SAndreas Gruenbacher [P_BARRIER_ACK] = "BarrierAck", 3438f2ad9063SAndreas Gruenbacher [P_STATE_CHG_REQ] = "StateChgRequest", 3439f2ad9063SAndreas Gruenbacher [P_STATE_CHG_REPLY] = "StateChgReply", 3440f2ad9063SAndreas Gruenbacher [P_OV_REQUEST] = "OVRequest", 3441f2ad9063SAndreas Gruenbacher [P_OV_REPLY] = "OVReply", 3442f2ad9063SAndreas Gruenbacher [P_OV_RESULT] = "OVResult", 3443f2ad9063SAndreas Gruenbacher [P_CSUM_RS_REQUEST] = "CsumRSRequest", 3444f2ad9063SAndreas Gruenbacher [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", 3445f2ad9063SAndreas Gruenbacher [P_COMPRESSED_BITMAP] = "CBitmap", 3446f2ad9063SAndreas Gruenbacher [P_DELAY_PROBE] = "DelayProbe", 3447f2ad9063SAndreas Gruenbacher [P_OUT_OF_SYNC] = "OutOfSync", 34487be8da07SAndreas Gruenbacher [P_RETRY_WRITE] = "RetryWrite", 3449ae25b336SLars Ellenberg [P_RS_CANCEL] = "RSCancel", 3450ae25b336SLars Ellenberg [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", 3451ae25b336SLars Ellenberg [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", 3452036b17eaSPhilipp Reisner [P_RETRY_WRITE] = "retry_write", 3453036b17eaSPhilipp Reisner [P_PROTOCOL_UPDATE] = "protocol_update", 3454ae25b336SLars Ellenberg 3455ae25b336SLars Ellenberg /* enum drbd_packet, but not commands - obsoleted flags: 3456ae25b336SLars Ellenberg * P_MAY_IGNORE 3457ae25b336SLars Ellenberg * P_MAX_OPT_CMD 3458ae25b336SLars Ellenberg */ 3459f2ad9063SAndreas Gruenbacher }; 3460f2ad9063SAndreas Gruenbacher 3461ae25b336SLars Ellenberg /* too big for the array: 0xfffX */ 3462e5d6f33aSAndreas Gruenbacher if (cmd == P_INITIAL_META) 3463e5d6f33aSAndreas Gruenbacher return "InitialMeta"; 3464e5d6f33aSAndreas Gruenbacher if (cmd == P_INITIAL_DATA) 3465e5d6f33aSAndreas Gruenbacher return "InitialData"; 34666038178eSAndreas Gruenbacher if (cmd == P_CONNECTION_FEATURES) 34676038178eSAndreas Gruenbacher return "ConnectionFeatures"; 34686e849ce8SAndreas Gruenbacher if (cmd >= ARRAY_SIZE(cmdnames)) 3469f2ad9063SAndreas Gruenbacher return "Unknown"; 3470f2ad9063SAndreas Gruenbacher return cmdnames[cmd]; 3471f2ad9063SAndreas Gruenbacher } 3472f2ad9063SAndreas Gruenbacher 34737be8da07SAndreas Gruenbacher /** 34747be8da07SAndreas Gruenbacher * drbd_wait_misc - wait for a request to make progress 34757be8da07SAndreas Gruenbacher * @mdev: device associated with the request 34767be8da07SAndreas Gruenbacher * @i: the struct drbd_interval embedded in struct drbd_request or 34777be8da07SAndreas Gruenbacher * struct drbd_peer_request 34787be8da07SAndreas Gruenbacher */ 34797be8da07SAndreas Gruenbacher int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) 34807be8da07SAndreas Gruenbacher { 348144ed167dSPhilipp Reisner struct net_conf *nc; 34827be8da07SAndreas Gruenbacher DEFINE_WAIT(wait); 34837be8da07SAndreas Gruenbacher long timeout; 34847be8da07SAndreas Gruenbacher 348544ed167dSPhilipp Reisner rcu_read_lock(); 348644ed167dSPhilipp Reisner nc = rcu_dereference(mdev->tconn->net_conf); 348744ed167dSPhilipp Reisner if (!nc) { 348844ed167dSPhilipp Reisner rcu_read_unlock(); 34897be8da07SAndreas Gruenbacher return -ETIMEDOUT; 349044ed167dSPhilipp Reisner } 349144ed167dSPhilipp Reisner timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT; 349244ed167dSPhilipp Reisner rcu_read_unlock(); 34937be8da07SAndreas Gruenbacher 34947be8da07SAndreas Gruenbacher /* Indicate to wake up mdev->misc_wait on progress. */ 34957be8da07SAndreas Gruenbacher i->waiting = true; 34967be8da07SAndreas Gruenbacher prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); 34977be8da07SAndreas Gruenbacher spin_unlock_irq(&mdev->tconn->req_lock); 34987be8da07SAndreas Gruenbacher timeout = schedule_timeout(timeout); 34997be8da07SAndreas Gruenbacher finish_wait(&mdev->misc_wait, &wait); 35007be8da07SAndreas Gruenbacher spin_lock_irq(&mdev->tconn->req_lock); 35017be8da07SAndreas Gruenbacher if (!timeout || mdev->state.conn < C_CONNECTED) 35027be8da07SAndreas Gruenbacher return -ETIMEDOUT; 35037be8da07SAndreas Gruenbacher if (signal_pending(current)) 35047be8da07SAndreas Gruenbacher return -ERESTARTSYS; 35057be8da07SAndreas Gruenbacher return 0; 35067be8da07SAndreas Gruenbacher } 35077be8da07SAndreas Gruenbacher 3508b411b363SPhilipp Reisner #ifdef CONFIG_DRBD_FAULT_INJECTION 3509b411b363SPhilipp Reisner /* Fault insertion support including random number generator shamelessly 3510b411b363SPhilipp Reisner * stolen from kernel/rcutorture.c */ 3511b411b363SPhilipp Reisner struct fault_random_state { 3512b411b363SPhilipp Reisner unsigned long state; 3513b411b363SPhilipp Reisner unsigned long count; 3514b411b363SPhilipp Reisner }; 3515b411b363SPhilipp Reisner 3516b411b363SPhilipp Reisner #define FAULT_RANDOM_MULT 39916801 /* prime */ 3517b411b363SPhilipp Reisner #define FAULT_RANDOM_ADD 479001701 /* prime */ 3518b411b363SPhilipp Reisner #define FAULT_RANDOM_REFRESH 10000 3519b411b363SPhilipp Reisner 3520b411b363SPhilipp Reisner /* 3521b411b363SPhilipp Reisner * Crude but fast random-number generator. Uses a linear congruential 3522b411b363SPhilipp Reisner * generator, with occasional help from get_random_bytes(). 3523b411b363SPhilipp Reisner */ 3524b411b363SPhilipp Reisner static unsigned long 3525b411b363SPhilipp Reisner _drbd_fault_random(struct fault_random_state *rsp) 3526b411b363SPhilipp Reisner { 3527b411b363SPhilipp Reisner long refresh; 3528b411b363SPhilipp Reisner 352949829ea7SRoel Kluin if (!rsp->count--) { 3530b411b363SPhilipp Reisner get_random_bytes(&refresh, sizeof(refresh)); 3531b411b363SPhilipp Reisner rsp->state += refresh; 3532b411b363SPhilipp Reisner rsp->count = FAULT_RANDOM_REFRESH; 3533b411b363SPhilipp Reisner } 3534b411b363SPhilipp Reisner rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD; 3535b411b363SPhilipp Reisner return swahw32(rsp->state); 3536b411b363SPhilipp Reisner } 3537b411b363SPhilipp Reisner 3538b411b363SPhilipp Reisner static char * 3539b411b363SPhilipp Reisner _drbd_fault_str(unsigned int type) { 3540b411b363SPhilipp Reisner static char *_faults[] = { 3541b411b363SPhilipp Reisner [DRBD_FAULT_MD_WR] = "Meta-data write", 3542b411b363SPhilipp Reisner [DRBD_FAULT_MD_RD] = "Meta-data read", 3543b411b363SPhilipp Reisner [DRBD_FAULT_RS_WR] = "Resync write", 3544b411b363SPhilipp Reisner [DRBD_FAULT_RS_RD] = "Resync read", 3545b411b363SPhilipp Reisner [DRBD_FAULT_DT_WR] = "Data write", 3546b411b363SPhilipp Reisner [DRBD_FAULT_DT_RD] = "Data read", 3547b411b363SPhilipp Reisner [DRBD_FAULT_DT_RA] = "Data read ahead", 3548b411b363SPhilipp Reisner [DRBD_FAULT_BM_ALLOC] = "BM allocation", 35496b4388acSPhilipp Reisner [DRBD_FAULT_AL_EE] = "EE allocation", 35506b4388acSPhilipp Reisner [DRBD_FAULT_RECEIVE] = "receive data corruption", 3551b411b363SPhilipp Reisner }; 3552b411b363SPhilipp Reisner 3553b411b363SPhilipp Reisner return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; 3554b411b363SPhilipp Reisner } 3555b411b363SPhilipp Reisner 3556b411b363SPhilipp Reisner unsigned int 3557b411b363SPhilipp Reisner _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) 3558b411b363SPhilipp Reisner { 3559b411b363SPhilipp Reisner static struct fault_random_state rrs = {0, 0}; 3560b411b363SPhilipp Reisner 3561b411b363SPhilipp Reisner unsigned int ret = ( 3562b411b363SPhilipp Reisner (fault_devs == 0 || 3563b411b363SPhilipp Reisner ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) && 3564b411b363SPhilipp Reisner (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate)); 3565b411b363SPhilipp Reisner 3566b411b363SPhilipp Reisner if (ret) { 3567b411b363SPhilipp Reisner fault_count++; 3568b411b363SPhilipp Reisner 35697383506cSLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 3570b411b363SPhilipp Reisner dev_warn(DEV, "***Simulating %s failure\n", 3571b411b363SPhilipp Reisner _drbd_fault_str(type)); 3572b411b363SPhilipp Reisner } 3573b411b363SPhilipp Reisner 3574b411b363SPhilipp Reisner return ret; 3575b411b363SPhilipp Reisner } 3576b411b363SPhilipp Reisner #endif 3577b411b363SPhilipp Reisner 3578b411b363SPhilipp Reisner const char *drbd_buildtag(void) 3579b411b363SPhilipp Reisner { 3580b411b363SPhilipp Reisner /* DRBD built from external sources has here a reference to the 3581b411b363SPhilipp Reisner git hash of the source code. */ 3582b411b363SPhilipp Reisner 3583b411b363SPhilipp Reisner static char buildtag[38] = "\0uilt-in"; 3584b411b363SPhilipp Reisner 3585b411b363SPhilipp Reisner if (buildtag[0] == 0) { 3586b411b363SPhilipp Reisner #ifdef CONFIG_MODULES 3587b411b363SPhilipp Reisner if (THIS_MODULE != NULL) 3588b411b363SPhilipp Reisner sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); 3589b411b363SPhilipp Reisner else 3590b411b363SPhilipp Reisner #endif 3591b411b363SPhilipp Reisner buildtag[0] = 'b'; 3592b411b363SPhilipp Reisner } 3593b411b363SPhilipp Reisner 3594b411b363SPhilipp Reisner return buildtag; 3595b411b363SPhilipp Reisner } 3596b411b363SPhilipp Reisner 3597b411b363SPhilipp Reisner module_init(drbd_init) 3598b411b363SPhilipp Reisner module_exit(drbd_cleanup) 3599b411b363SPhilipp Reisner 3600b411b363SPhilipp Reisner EXPORT_SYMBOL(drbd_conn_str); 3601b411b363SPhilipp Reisner EXPORT_SYMBOL(drbd_role_str); 3602b411b363SPhilipp Reisner EXPORT_SYMBOL(drbd_disk_str); 3603b411b363SPhilipp Reisner EXPORT_SYMBOL(drbd_set_st_err_str); 3604